- priority: 5 --> 2
- status: open --> closed-fixed
OriginalBugID: 3889 Bug
Version: 8.3b1
SubmitDate: '1999-12-15'
LastModified: '2000-01-20'
Severity: MED
Status: Released
Submitter: techsupp
ChangedBy: hobbs
OS: All
FixedDate: '2000-01-20'
FixedInVersion: 8.3
ClosedDate: '2000-10-25'
Name:
Peter Spjuth
ObservedBehavior:
In TclCompileSetCmd there is a construct that allows lines like:
set arr($a) 1
to be compiled to more efficient byte codes than a general
array assignment.
The rules for it to do this are that the variable name should
consist of 4 tokens like this:
Token 1 shall be text, ending with a "(".
Token 2 and 3 can be anything. (A simple variable reference
gives two tokens.)
Token 4 shall be text, the exact string ")".
These rules create some bugs. Commands like the following
(not very common, but allowed) will not work:
set arr(a($a) 1
set arr(\w\w) 1
set arr([command arg][command arg]) 1
The first will set the array "arr(a" instead of the correct "arr",
since there are no check for extra parentheses in token 1.
The second and third will fail since they produce tokens that fit the
rules, but later code assumes token 2 shall be a variable token.
(token 2 and 3 are backslash tokens in the second case and command
tokens in the third)
So, to be secure, the rules needs to be changed to:
Token 1 shall be text, ending with a "(" and with no other "(".
Token 2 shall be a variable, with one subtoken.
Token 3 can be anything.
Token 4 shall be text, the exact string ")".
That was the errors. Now over to the reason I started looking at
this in the first place: optimisation.
Many common array assignments could be compiled to the more
efficient byte codes. For example:
set arr($a,apa) 1
set arr([lindex opts 2]) 1
set arr(apa,$a,$b) 1
By changing the rules to:
If there are n tokens
Token 1 shall be text, containing a "(".
Token n shall be text, ending with ")".
No character between the parentheses may be a double quote.
With this, almost every common array assignment will benfit.
I must admit that this feels a little risky, but I have tested
it thouroughly, and I have given it a lot of thought and have
not been able to construct anything that would make this fail.
Included is one patch that just fixes the bug, and one patch that
changes all the way.
I have also made a test case where a lot of common array constructs
plus constructs that I have identified as a problem are tested.
Patch:
Index: tclCompCmds.c
===================================================================
RCS file: /cvsroot/tcl/generic/tclCompCmds.c,v
retrieving revision 1.4
diff -c -r1.4 tclCompCmds.c
*** tclCompCmds.c 1999/10/29 03:04:00 1.4
--- tclCompCmds.c 1999/12/15 22:10:51
***************
*** 1650,1663 ****
} else if ((varTokenPtr->numComponents == 4)
&& (varTokenPtr[1].type == TCL_TOKEN_TEXT)
&& (varTokenPtr[1].start[varTokenPtr[1].size-1] == '(')
&& (varTokenPtr[4].type == TCL_TOKEN_TEXT)
&& (varTokenPtr[4].size == 1)
&& (varTokenPtr[4].start[0] == ')')) {
simpleVarName = 1;
! name = varTokenPtr[1].start;
! nameChars = varTokenPtr[1].size - 1;
! elName = varTokenPtr[2].start;
! elNameChars = varTokenPtr[2].size;
}
if (simpleVarName) {
--- 1650,1677 ----
} else if ((varTokenPtr->numComponents == 4)
&& (varTokenPtr[1].type == TCL_TOKEN_TEXT)
&& (varTokenPtr[1].start[varTokenPtr[1].size-1] == '(')
+ && (varTokenPtr[2].type == TCL_TOKEN_VARIABLE)
+ && (varTokenPtr[2].numComponents == 1)
&& (varTokenPtr[4].type == TCL_TOKEN_TEXT)
&& (varTokenPtr[4].size == 1)
&& (varTokenPtr[4].start[0] == ')')) {
simpleVarName = 1;
!
! /* Check for parentheses inside first token */
! for (i = 0, p = varTokenPtr[1].start;
! i < (varTokenPtr[1].size - 1); i++, p++) {
! if (*p == '(') {
! simpleVarName = 0;
! break;
! }
! }
!
! if (simpleVarName) {
! name = varTokenPtr[1].start;
! nameChars = varTokenPtr[1].size - 1;
! elName = varTokenPtr[2].start;
! elNameChars = varTokenPtr[2].size;
! }
}
if (simpleVarName) {
Index: tclCompCmds.c
===================================================================
RCS file: /cvsroot/tcl/generic/tclCompCmds.c,v
retrieving revision 1.4
diff -c -r1.4 tclCompCmds.c
*** tclCompCmds.c 1999/10/29 03:04:00 1.4
--- tclCompCmds.c 1999/12/15 22:12:08
***************
*** 1579,1585 ****
register char *p;
char *name, *elName;
int nameChars, elNameChars;
! register int i;
int isAssignment, simpleVarName, localIndex, numWords;
int maxDepth = 0;
int code = TCL_OK;
--- 1579,1585 ----
register char *p;
char *name, *elName;
int nameChars, elNameChars;
! register int i, n;
int isAssignment, simpleVarName, localIndex, numWords;
int maxDepth = 0;
int code = TCL_OK;
***************
*** 1647,1663 ****
break;
}
}
! } else if ((varTokenPtr->numComponents == 4)
&& (varTokenPtr[1].type == TCL_TOKEN_TEXT)
! && (varTokenPtr[1].start[varTokenPtr[1].size-1] == '(')
! && (varTokenPtr[4].type == TCL_TOKEN_TEXT)
! && (varTokenPtr[4].size == 1)
! && (varTokenPtr[4].start[0] == ')')) {
! simpleVarName = 1;
! name = varTokenPtr[1].start;
! nameChars = varTokenPtr[1].size - 1;
! elName = varTokenPtr[2].start;
! elNameChars = varTokenPtr[2].size;
}
if (simpleVarName) {
--- 1647,1685 ----
break;
}
}
! } else if (((n = varTokenPtr->numComponents) > 1)
&& (varTokenPtr[1].type == TCL_TOKEN_TEXT)
! && (varTokenPtr[n].type == TCL_TOKEN_TEXT)
! && (varTokenPtr[n].start[varTokenPtr[n].size - 1] == ')')) {
! simpleVarName = 0;
!
! /* Check for parentheses inside first token */
! for (i = 0, p = varTokenPtr[1].start;
! i < varTokenPtr[1].size; i++, p++) {
! if (*p == '(') {
! simpleVarName = 1;
! break;
! }
! }
! if (simpleVarName) {
! name = varTokenPtr[1].start;
! nameChars = p - varTokenPtr[1].start;
! elName = p + 1;
! elNameChars = (varTokenPtr[n].start - p) + varTokenPtr[n].size - 2;
!
! /*
! * If elName contains any double quotes ("), we can't inline
! * compile the element script using the replace '()' by '"'
! * technique below.
! */
!
! for (i = 0, p = elName; i < elNameChars; i++, p++) {
! if (*p == '"') {
! simpleVarName = 0;
! break;
! }
! }
! }
}
if (simpleVarName) {
===================================================================
RCS file: /cvsroot/tcl/tests/set.test,v
retrieving revision 1.6
diff -c -r1.6 set.test
*** set.test 1999/10/29 03:04:37 1.6
--- set.test 1999/12/15 21:32:36
***************
*** 205,210 ****
--- 205,235 ----
catch {unset array}
set {array($foo)} 5
} 5
+ test set-1.26 {TclCompileSetCmd: various array constructs} {
+ # Test all kinds of array constructs that TclCompileSetCmd
+ # may feel inclined to tamper with.
+ proc p {} {
+ set a x
+ set be(hej) 1
+ set be($a) 1
+ set {be($a)} 1
+ set be($a,hej) 1
+ set be($a,$a) 5
+ set be(c($a) 1
+ set be(\w\w) 1
+ set be(a:$a) [set be(x,$a)]
+ set be(hej,$be($a,hej),hej) 1
+ set be([string range hugge 0 2]) 1
+ set be(a\ a) 1
+ set be($a\ ,[string range hugge 1 3],hej) 1
+ set be($a,h"ej) 1
+ set be([string range "a b c" 2 end]) 1
+ set [string range bet 0 1](foo) 1
+ set be([set be(a:$a)][set b\e($a)]) 1
+ return [lsort [array names be]]
+ }
+ p
+ } [lsort {hej x $a x,hej x,x c(x ww a:x hej,1,hej hug {a a} {x ,ugg,hej} x,h"ej {b c} foo 51}]
test set-2.1 {set command: runtime error, bad variable name} {
list [catch {set {"foo}} msg] $msg $errorInfo
PatchFiles:
tclCompCmds.c
set.test
took latter (fix + optimisation) patch for 8.3.0.
-- 01/20/2000 hobbs