[Lxr-commits] CVS: lxr/lib/LXR/Lang generic.conf,1.39,1.40

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/lxr/lxr/lib/LXR/Lang
In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv10665/lib/LXR/Lang

Modified Files:
	generic.conf 
Log Message:
generic.conf: HTML, JAVA, Make, Pascal, PHP, Python, Ruby include keyword recognition improvement

Also, various Perl syntax optimisations

Index: generic.conf
===================================================================
RCS file: /cvsroot/lxr/lxr/lib/LXR/Lang/generic.conf,v
retrieving revision 1.39
retrieving revision 1.40
diff -u -d -r1.39 -r1.40

--- generic.conf	19 Apr 2013 12:42:14 -0000	1.39
+++ generic.conf	24 Sep 2013 10:10:10 -0000	1.40
@@ -8,19 +8,19 @@
 
 	# Options to always feed to ectags
 	'ectagsopts' =>
-		[ "--options=" . $config->ectagsconf
-		, "--c-types=+plx"
-		, "--eiffel-types=+l"
-		, "--fortran-types=+L",
+		[ '--options=' . $config->{'ectagsconf'}
+		, '--c-types=+plx'
+		, '--eiffel-types=+l'
+		, '--fortran-types=+L'
 		]
 
 	# How to map a language name to the ectags language-force name
 	# if there is no mapping, then the language name is used
 ,	'eclangnamemapping' =>
-		{ 'C'      => 'c'		# not necessary as ctags
-		, 'C++'    => 'c++'		# seems to take language
-		, 'Python' => 'python'	# names case-insensitive
-		, 'SQL'    => 'SQL2'	# -- ctags processing replaced by regexp
+# 		{ 'C'      => 'c'		# not necessary as ctags
+# 		, 'C++'    => 'c++'		# seems to take language
+# 		, 'Python' => 'python'	# names case-insensitive
+		{ 'SQL'    => 'SQL2'	# -- ctags processing replaced by regexp
 								#    (see ectags.conf)
 			# NOTE: language description is not case-insensitive and
 			#		makes assumptions about layout of declarations.
@@ -110,8 +110,8 @@
 			[ { 'comment'	=> [ '/\*',         '\*/'] }
 			# Does not address "line comment" since syntax is target-specific
 			, { 'string'	=> [ '"',           '"',   '\\\\.' ] }
-			, { 'string'	=> [ "'\\\\?." ] }
-			, { 'include'	=> [ '#\s*include', "\$" ] }
+			, { 'string'	=> [ "'\\\\?.'" ] }
+			, { 'include'	=> [ '#\s*include\b', '$' ] }
 			]
 		, 'typemap' =>
 			{ 'd' => 'define'
@@ -172,11 +172,11 @@
 # 			bindtextdomain	dcgettext	dcngettext
 # 		--- End of built-in list ---
 		, 'spec' =>
-			[ { 'comment'	=> [ '#',         "\$" ] }
+			[ { 'comment'	=> [ '#',         '$' ] }
 			, { 'string'	=> [ '"',         '"',   '\\\\.' ] }
 			, { 'string'	=> [ '/',         '/',   '\\\\.' ] }
 				# for regexps in fact
-			, { 'include'	=> [ '@include', "\$" ] }
+			, { 'include'	=> [ '@include\b', '$' ] }
 			]
 		, 'include' =>
 			{ 'directive' => '(@include)(\s+)(")((?:\\\\.|.)+)(")'
@@ -235,10 +235,10 @@
 						) ]
 		, 'spec' =>
 			[ { 'comment'	=> [ '/\*',         '\*/'] }
-			, { 'comment'	=> [ '//',          "\$" ] }
+			, { 'comment'	=> [ '//',          '$' ] }
 			, { 'string'	=> [ '"',           '"',   '\\\\.' ] }
 			, { 'string'	=> [ "'",           "'",   "\\\\." ] }
-			, { 'include'	=> [ '#\s*include', "\$" ] }
+			, { 'include'	=> [ '#\s*include\b', '$' ] }
 			]
 		,	'typemap' =>
 			{ 'c' => 'class'
@@ -293,10 +293,10 @@
 						) ]
 		, 'spec' =>
 				[ { 'comment'	=> [ '/\*',         '\*/'] }
-				, { 'comment'	=> [ '//',          "\$" ] }
+				, { 'comment'	=> [ '//',          '$' ] }
 				, { 'string'	=> [ '"',           '"',   '\\\\.' ] }
 				, { 'string'	=> [ "'",           "'",   "\\\\." ] }
-				, { 'include'	=> [ '#\s*include', "\$" ] }
+				, { 'include'	=> [ '#\s*include\b', '$' ] }
 				]
 		, 'typemap' =>
 			{ 'c' => 'class'
@@ -471,7 +471,7 @@
 			ZERO ZEROES ZEROS
 						) ]
 		, 'spec' =>
-			[ { 'comment'	=> [ '^[\d ]*\*', '$' ] }
+			[ { 'comment'	=> [ '^[\d ]*\\*', '$' ] }
 			, { 'string'	=> [ '"',         '"' ] }
 			]
 		, 'typemap' =>
@@ -592,7 +592,7 @@
 		}
 	,
 		'HTML' =>		# HTML 4.01 only
-		{ 'identdef' => '[a-zA-Z][\w]*'
+		{ 'identdef' => '[a-zA-Z]\w*'
 		, 'flags' => [ 'case_insensitive' ]
 		, 'reserved' => [ qw(
 			a        abbr    acronym  address applet
@@ -628,19 +628,12 @@
 			[ { 'comment'	=> [ '<!--', '-->'] }
 			, { 'string' 	=> [ '"',    '"' ] }
 			, { 'string' 	=> [ "'",    "'" ] }
-			, { 'include'	=> [ '(?:href|longdesc|src)="', '"' ] }
-			, { 'include'	=> [ "(?:href|longdesc|src)='", "'" ] }
+			, { 'include'	=> [ '\b(?:href|longdesc|src)="', '"' ] }
+			, { 'include'	=> [ "\b(?:href|longdesc|src)='", "'" ] }
 		# The next one for HTML character entity, but UNSAFE
 		# because & can be found in unprotected query strings
-			, { 'string' 	=> [ "&",    ";" ] }
+			, { 'string' 	=> [ '&',    ';' ] }
 			]
-		, 'include' =>
-			{ 'directive' => '(\w+=)()("|\')(.+)("|\')'
-			, 'global' =>	[ '&quot;', '"'
-							, '&#34',      '"', '&#39',      "'"
-							, '(?i)&#x22', '"', '(?i)&#x27', "'"
-							]
-			}
 		, 'typemap'  =>
 			{ 'a' => 'named anchor'
 			, 'f' => 'JavaScript function'
@@ -676,11 +669,11 @@
 						)]
 		, 'spec' =>
 			[ { 'comment'	=> [ '/\*',     '\*/'] }
-			, { 'comment'	=> [ '//',      "\$" ] }
+			, { 'comment'	=> [ '//',      '$' ] }
 			, { 'string' 	=> [ '"',       '"', '\\\\.' ] }
 			, { 'string' 	=> [ "'",       "'", "\\\\." ] }
-			, { 'include'	=> [ 'import',  ";" ] }
-			, { 'include'	=> [ 'package', ";" ] }
+			, { 'include'	=> [ '\bimport\b',  ';' ] }
+			, { 'include'	=> [ '\bpackage\b', ';' ] }
 			]
 		, 'typemap' =>
 			{ 'c' => 'class'
@@ -728,7 +721,7 @@
 						) ]
 		, 'spec' =>
 			[ { 'comment'	=> [ '/\*',     '\*/'] }
-			, { 'comment'	=> [ '//',      "\$" ] }
+			, { 'comment'	=> [ '//',      '$' ] }
 			, { 'string' 	=> [ '"',       '"', '\\\\.' ] }
 			, { 'string' 	=> [ "'",       "'", "\\\\." ] }
 			]
@@ -784,10 +777,10 @@
 			call      eval      value
 						) ]
 		, 'spec'     =>
-			[ { 'comment'	=> [ '#', "\$"] }
+			[ { 'comment'	=> [ '#', '$'] }
 			, { 'string'	=> [ '"', '"' ] }
 			, { 'string'	=> [ "'", "'" ] }
-			, { 'include'	=> [ '[s-]?include\s', '\$'] }
+			, { 'include'	=> [ '(\bs|-)?include\s', '$'] }
 			]
 	# Multiple inclusion is handled in the specific parser.
 		, 'typemap' => { 'm' => 'macro' }
@@ -847,12 +840,8 @@
 			, { 'comment'	=> [ '\\(\\*', '\\*\\)' ] }
 			, { 'comment'	=> [ '//', '$' ] }
 			, { 'string'	=> [ "'", "'" ] }
-			, { 'include'	=> [ 'uses', ';' ] }
+			, { 'include'	=> [ '\buses\b', ';' ] }
 			]
-		, 'include' =>
-			{ 'directive' => '([\w]+)(\s+)()([\w]+)()'
-			, 'post'   => [ '\$', '.pas' ]
-			}
 		, 'typemap' =>
 			{ 'f' => 'function'
 			, 'p' => 'procedure'
@@ -952,31 +941,33 @@
 			[ { 'atom'	=>	 '(?:\$#?\w+'
 							.'|\\\\.'
 							.'|\b(s|tr|y)\b\s*'
-								.'(?:(?:\{(?:\\\\.|.)*?\}){2}'
-								.'|(?:\[(?:\\\\.|.)*?\]}){2}'
-								.'|(?:\((?:\\\\.|.)*?\)){2}'
-								.'|(?:\<(?:\\\\.|.)*?\>){2}'
+								.'(?:(?:\\{(?:\\\\.|.)*?\\}){2}'
+								.'|  (?:\\[(?:\\\\.|.)*?\\]}){2}'
+								.'|  (?:\\((?:\\\\.|.)*?\\)){2}'
+								.'|  (?:\\<(?:\\\\.|.)*?\\>){2}'
 								.'|(.)(?:(?:\\\\.|.)*?\g{-1}){2}'
 								.')'
 							.'|\b(q[qrwx]?|m)\b\s*'
-								.'(?:\{(?:\\\\.|.)*?\}'
-								.'|\[(?:\\\\.|.)*?\]'
-								.'|\((?:\\\\.|.)*?\)'
-								.'|\<(?:\\\\.|.)*?\>'
-								.'|(.)(?:\\\\.|.)*?\g{-1}'
+								.'(?:\\{(?:\\\\.|.)*?\\}'
+								.'|  \\[(?:\\\\.|.)*?\\]'
+								.'|  \\((?:\\\\.|.)*?\\)'
+								.'|  \\<(?:\\\\.|.)*?\\>'
+								.'|(.)(?:\\\\.|.)*?\\g{-1}'
 								.')'
 							.')' }
 	# NOTE:	This complicated 'atom' is an attempt not to lose control
 	#		through quote and regexp operators. It is not 100%
 	#		bullet-proof. It can't handle nested m{ {...} } for instance.
+	# NOTE 2: It does not work if the regular expressions and/or replacements
+	#		extend on several lines (as is frequently the case with /x modifier.
 
 			, { 'include'	=> ['\buse\s+',     ';'] }
 			, { 'include'	=> ['\brequire\s+', ';'] }
 			, { 'string'	=> ['"',            '"',      '\\\\.'] }
 			, { 'string'	=> ["'",            "'",      "\\\\."] }
 			, { 'string'	=> ['\\`',          '\\`',    '\\\\.'] }
-			, { 'comment'	=> ['#',            "\$"] }
-			, { 'comment'	=> ["^=\\w+",       "^=cut"] }
+			, { 'comment'	=> ['#',            '$'] }
+			, { 'comment'	=> ['^=\w+',       '^=cut'] }
 			]
 	# The following include rules are more efficiently implemented
 	# in the Perl.pm module. They are shown as an example of
@@ -1056,17 +1047,17 @@
 			self      parent
 						) ]
 		, 'spec' =>
-			[ { 'comment'	=> [ '/\\*',         '\*/' ] }
-			, { 'comment'	=> [ '//',           "\$" ] }
-			, { 'comment'	=> [ '#',            "\$" ] }
+			[ { 'comment'	=> [ '/\*',         '\*/' ] }
+			, { 'comment'	=> [ '//',           '$' ] }
+			, { 'comment'	=> [ '#',            '$' ] }
 			# TODO: Heredoc and Nowdoc if it makes sense
 			#		to consider them comments
 			, { 'string'	=> [ '"',            '"', '\\\\.' ] }
 			, { 'string'	=> [ "'",            "'", '\\\\.' ] }
-			, { 'include'	=> [ 'require',      "\$" ] }
-			, { 'include'	=> [ 'include',      "\$" ] }
-			, { 'include'	=> [ 'require_once', "\$" ] }
- 			, { 'include'	=> [ 'include_once', "\$" ] }
+			, { 'include'	=> [ '\brequire\b',      '$' ] }
+			, { 'include'	=> [ '\binclude\b',      '$' ] }
+			, { 'include'	=> [ '\brequire_once\b', '$' ] }
+ 			, { 'include'	=> [ '\binclude_once\b', '$' ] }
 			],
 		, 'include' =>
 			{ 'directive' => '([\w]+)(\s*)(\\(\\s*[\'"])([^\'"]+)("\\s*\\))'
@@ -1104,13 +1095,13 @@
 			yield
 						) ]
 		, 'spec' =>
-			[ { 'comment'	=> [ '#',   "\$" ] }
+			[ { 'comment'	=> [ '#',   '$' ] }
 			, { 'string'	=> [ '"""', '"""', '\\\\.' ] }
 			, { 'string'	=> [ "'''", "'''", "\\\\." ] }
 			, { 'string'	=> [ '"',   '"',   '\\\\.' ] }
 			, { 'string'	=> [ "'",   "'",   "\\\\." ] }
-			, { 'include'	=> [ '\bimport\b', "\$" ] }
-			, { 'include'	=> [ '\bfrom\b',   "\$" ] }
+			, { 'include'	=> [ '\bimport\b', '$' ] }
+			, { 'include'	=> [ '\bfrom\b',   '$' ] }
 			]
 	# Include rules implemented in Python.pm to cope with an
 	# endlessly looping case under 'include' patterns.
@@ -1156,8 +1147,8 @@
 			__FILE__ __LINE__
 						) ]
 		, 'spec' =>
-			[ { 'comment'	=> [ '^=begin',  '^=end'] }
-			, { 'comment'	=> [ '#',       "\$"] }
+			[ { 'comment'	=> [ '^=begin\b',  '^=end\b'] }
+			, { 'comment'	=> [ '#',       '$'] }
 			, { 'string' 	=> [ '"',       '"'
 					, '\\\\(?:(?:[CM]-|c)[^\\\\]?|\$|.)' ] }
 			, { 'string' 	=> [ "'",       "'"
@@ -1165,17 +1156,16 @@
 			, { 'string' 	=> [ '\\`',     '\\`'
 					, '\\\\(?:(?:[CM]-|c)[^\\\\]?|\$|.)' ] }
 			# The following def does not handle nested construct
-			, { 'string' 	=>	[ '\%[qQrs]?'
-								. '(?:\\([^(]*?\\)'
-								. '|\\[[^(]*?\\]'
-								. '|\\{[^(]*?\\}'
-								. '|\\<[^(]*?\\>'
- 								. '|([\W]).*?\g{-1}'
-								. ')'
-							   ] }
+			, { 'string' 	=> [ '\%[iIqQrswW]?\\(', '\\)', '\\\\.' ] }
+			, { 'string' 	=> [ '\%[iIqQrswW]?\\[', '\\]', '\\\\.' ] }
+			, { 'string' 	=> [ '\%[iIqQrswW]?\\{', '\\}', '\\\\.' ] }
+			, { 'string' 	=> [ '\%[iIqQrswW]?\\<', '\\>', '\\\\.' ] }
+			, { 'string' 	=> [ '\%[iIqQrswW]?'
+								. '([\W]).*?\g{-1}'
+								] }
 # 			, { 'string' 	=> [ "(?<!:):(?:[!%^&*/|+-]|[@$]?[A-Za-z_]+?)", '[A-Za-z_0-9!?=]*(?![A-Za-z_0-9!?=])' ] }
-			, { 'include'	=> [ 'require', "\$" ] }
-			, { 'include'	=> [ 'load',    "\$" ] }
+			, { 'include'	=> [ '\brequire\b', '$' ] }
+			, { 'include'	=> [ '\bload\b',    '$' ] }
 			]
 		, 'typemap' =>
 			{ 'c' => 'class'
@@ -1376,8 +1366,8 @@
 						) ]
 		, 'spec' =>
 			[ { 'comment'	=> [ '/\*', '\*/' ] }
-			, { 'comment'	=> [ '//',  "\$" ] }
-			, { 'comment'	=> [ '--',  "\$" ] }
+			, { 'comment'	=> [ '//',  '$' ] }
+			, { 'comment'	=> [ '--',  '$' ] }
 			, { 'string'	=> [  '"',   '"',   '\\\\.' ] }
 			, { 'string'	=> [  "'",   "'",   "\\\\." ] }
 			]
@@ -1563,9 +1553,9 @@
 			, 'Xor'
 			]
 		, 'spec' =>
-			[ { 'comment'	=> [ 'rem ', '\$' ] }
+			[ { 'comment'	=> [ '\brem\b', '$' ] }
 			, { 'comment'	=> [ "'",    "\$" ] }
-			, { 'string'	=> [  '"',   '"',  '\\\\.' ] }
+			, { 'string'	=> [ '"',    '"',  '\\\\.' ] }
 			, { 'string'	=> [ "'",    "'",  "\\\\." ] }
 			]
 		, 'typemap' =>