Perform faster macro concatenation Previously, concatenation was treated as a binary operator, taking two values from the stack and concatenating them. Where an expression involves more than one concatenation, this involved creating temporary strings, requiring separate allocation, which would only be discarded later, for each application of the binary concatenation. To make matters worse, concatenation would rescan each string to determine its length using strlen(). This modification changes the way the concatenation operator is handled. Instead of treating it as a binary operator, it sees it as a list operator, rather like the comma in an argument list. Thus a concatenation expression is treated as a counted sequence of subexpressions. The count is coded into the "bytecode" to tell the concat() function (in interpret.c) how many expression values to pull off the stack. The function then exploits the fact that string DataValue structures now include the strings' lengths to work out the total allocation length required for the whole result. (This is based on the previous implementation of makeArrayKeyFromArgs().) I have also added static functions longAsStr() and lenLongAsStr() to generate a string representation of a number, or just to work out its length. This allows us to avoid calling sprintf() with "%d" all the time. The new AllocStringOfNumber() exploits this, and is used in various places in interpret.c. Note that longAsStr() returns the string value in a static buffer which must be used/copied immediately to avoid a second call overwriting its content. diff -ur nedit_official nedit_mod diff -ur nedit_official/source/interpret.c nedit_mod/source/interpret.c --- nedit_official/source/interpret.c 2004-05-12 05:21:40.000000000 -0400 +++ nedit_mod/source/interpret.c 2004-06-11 18:07:32.510305000 -0400 @@ -110,6 +110,8 @@ static int or(void); static int not(void); static int power(void); +static int concatenateNwithSep(int nVals, const char *sep, char **result, + int leaveParams); static int concat(void); static int assign(void); static int callSubroutine(void); @@ -781,6 +783,62 @@ } /* +** Convert a long value to its decimal string representation, returned in a +** static string. +*/ +const char *longAsStr(long val) +{ + static const char digits[] = "0123456789"; + static char res[TYPE_INT_STR_SIZE(val) + 1]; + long val10; + char *pos = &res[TYPE_INT_STR_SIZE(val)]; + + /* the string is built backwards, so start by null terminating it */ + *pos = 0; + + if (val >= 0) { + /* do-while loop will deal with the val == 0 case */ + do { + /* we can use the modulo (%) operator here */ + *--pos = digits[val % 10]; + val /= 10; + } while (val != 0); + } + else { + /* we don't use the modulo (%) operator since its behaviour with + negative numbers is undefined by the C standards */ + do { + val10 = val / 10; + *--pos = digits[(10 * val10) - val]; + val = val10; + } while (val != 0); + *--pos = '-'; + } + return pos; +} + +/* +** Calculate the length of the decimal string representation of a long value. +*/ +int lenLongAsStr(long val) +{ + long val10; + int len = 0; + + if (val < 0) { + len++; /* for leading '-' */ + } + + /* do-while loop will deal with the val == 0 case */ + do { + len++; + val /= 10; + } while (val != 0); + + return len; +} + +/* ** Allocate memory for a string, and keep track of it, such that it ** can be recovered later using GarbageCollectStrings. (A linked list ** of pointers is maintained by threading through the memory behind @@ -871,6 +929,12 @@ return AllocStringNCpy(s, s ? strlen(s) : 0); } +/* Allocate a string holding the decimal value of the number */ +char *AllocStringOfNumber(long val) +{ + return AllocStringCpy(longAsStr(val)); +} + /* * Allocate a new NString buffer, containing a copy of the given string. * The length is set to the length of the string and resulting string is @@ -1068,8 +1132,7 @@ return execError(StackUnderflowMsg, ""); \ --StackP; \ if (StackP->tag == INT_TAG) { \ - string = AllocString(TYPE_INT_STR_SIZE(int)); \ - sprintf(string, "%d", StackP->val.n); \ + string = AllocStringOfNumber(StackP->val.n); \ } else if (StackP->tag == STRING_TAG) \ string = StackP->val.str.rep; \ else \ @@ -1077,8 +1140,7 @@ #define PEEK_STRING(string, peekIndex) \ if ((StackP - peekIndex - 1)->tag == INT_TAG) { \ - string = AllocString(TYPE_INT_STR_SIZE(int)); \ - sprintf(string, "%d", (StackP - peekIndex - 1)->val.n); \ + string = AllocStringOfNumber((StackP - peekIndex - 1)->val.n); \ } \ else if ((StackP - peekIndex - 1)->tag == STRING_TAG) { \ string = (StackP - peekIndex - 1)->val.str; \ @@ -1195,9 +1257,8 @@ --argNum; nArgs = FP_GET_ARG_COUNT(FrameP); if (argNum >= nArgs || argNum < 0) { - char argStr[TYPE_INT_STR_SIZE(argNum)]; - sprintf(argStr, "%d", argNum + 1); - return execError("referenced undefined argument: $args[%s]", argStr); + return execError("referenced undefined argument: $args[%s]", + longAsStr(argNum + 1)); } PUSH(FP_GET_ARG_N(FrameP, argNum)); return STAT_OK; @@ -1227,11 +1288,9 @@ resultArray->val.arrayPtr = ArrayNew(); for (argNum = 0; argNum < nArgs; ++argNum) { - char intStr[TYPE_INT_STR_SIZE(argNum)]; - - sprintf(intStr, "%d", argNum + 1); argVal = FP_GET_ARG_N(FrameP, argNum); - if (!ArrayInsert(resultArray, AllocStringCpy(intStr), &argVal)) { + if (!ArrayInsert(resultArray, AllocStringOfNumber(argNum + 1), + &argVal)) { return(execError("array insertion failure", NULL)); } } @@ -1822,26 +1881,102 @@ } /* -** concatenate two top items on the stack -** Before: Stack-> str2, str1, next, ... -** After: Stack-> result, next, ... +** A helper routine used in concat(), and makeArrayKeyFromArgs(). +** Concatenate a number of values from the stack and return the result as a +** character pointer in *result and its length as the return value, or less +** than zero on failure. If a divider is specified, add it between each of the +** stack elements. The stack elements are popped from the stack if leaveParams +** is false. +*/ +static int concatenateNwithSep(int nVals, const char *sep, char **result, + int leaveParams) +{ + int errNum; + DataValue value; + char *res = NULL; + char *pos; + int len; + int sepLen; + int i; + + *result = NULL; + + if (sep == NULL) { + sep = ""; + } + sepLen = strlen(sep); + + /* evaluate total length (upper limit) */ + len = sepLen * (nVals - 1); + for (i = nVals - 1; i >= 0; --i) { + PEEK(value, i) + if (value.tag == INT_TAG) { + len += lenLongAsStr(value.val.n); + } + else if (value.tag == STRING_TAG) { + len += value.val.str.len; + } + else { + return -1; /* invalid type */ + } + } + + /* allocate the string */ + res = AllocString(len + 1); + pos = res; + /* write everything into the result */ + for (i = nVals - 1; i >= 0; --i) { + PEEK(value, i) + if (value.tag == INT_TAG) { + pos += strlen(strcpy(pos, longAsStr(value.val.n))); + } + else { /* value.tag == STRING_TAG */ + strcpy(pos, value.val.str.rep); + pos += value.val.str.len; + } + if (i && sepLen) { + strcpy(pos, sep); + pos += sepLen; + } + } + /* remove the source expression values */ + if (!leaveParams) { + while (nVals--) { + POP(value) + } + } + + /* now return the results */ + *result = res; + return pos - res; +} + +/* +** concatenate a number of strings and push the result onto the stack +** +** Before: Prog-> [nExpr], next, ... +** Stack-> exprValN, ... exprVal1, next, ... +** After: Prog-> nExpr, [next], ... +** Stack-> concatResult, next, ... */ static int concat(void) { - char *s1, *s2, *out; - int len1, len2; + char *out; + int len; - DISASM_RT(PC-1, 1); - STACKDUMP(2, 3); + int nExpr; + + nExpr = (int)*PC; + PC++; - POP_STRING(s2) - POP_STRING(s1) - len1 = strlen(s1); - len2 = strlen(s2); - out = AllocString(len1 + len2 + 1); - strncpy(out, s1, len1); - strcpy(&out[len1], s2); - PUSH_STRING(out, len1 + len2) + DISASM_RT(PC-2, 2); + STACKDUMP(nExpr+1, 3); + + len = concatenateNwithSep(nExpr, "", &out, False); + if (len < 0) { + return(execError("can only concatenate with string or integer", NULL)); + } + PUSH_STRING(out, len) return STAT_OK; } @@ -2171,45 +2306,11 @@ */ static int makeArrayKeyFromArgs(int nArgs, char **keyString, int leaveParams) { - DataValue tmpVal; - int sepLen = strlen(ARRAY_DIM_SEP); - int keyLength = 0; - int i; + int len; - keyLength = sepLen * (nArgs - 1); - for (i = nArgs - 1; i >= 0; --i) { - PEEK(tmpVal, i) - if (tmpVal.tag == INT_TAG) { - keyLength += TYPE_INT_STR_SIZE(tmpVal.val.n); - } - else if (tmpVal.tag == STRING_TAG) { - keyLength += tmpVal.val.str.len; - } - else { - return(execError("can only index array with string or int.", NULL)); - } - } - *keyString = AllocString(keyLength + 1); - (*keyString)[0] = 0; - for (i = nArgs - 1; i >= 0; --i) { - if (i != nArgs - 1) { - strcat(*keyString, ARRAY_DIM_SEP); - } - PEEK(tmpVal, i) - if (tmpVal.tag == INT_TAG) { - sprintf(&((*keyString)[strlen(*keyString)]), "%d", tmpVal.val.n); - } - else if (tmpVal.tag == STRING_TAG) { - strcat(*keyString, tmpVal.val.str.rep); - } - else { - return(execError("can only index array with string or int.", NULL)); - } - } - if (!leaveParams) { - for (i = nArgs - 1; i >= 0; --i) { - POP(tmpVal) - } + len = concatenateNwithSep(nArgs, ARRAY_DIM_SEP, keyString, leaveParams); + if (len < 0) { + return(execError("can only index array with string or int.", NULL)); } return(STAT_OK); } @@ -2939,6 +3040,11 @@ (int)(&inst[i+1] + (int)inst[i+1])); ++i; } + else if (j == OP_CONCAT) { + printd("nExpr=%d", + (int)inst[i+1]); + ++i; + } else if (j == OP_SUBR_CALL) { printf("%s (%d arg)", ((Symbol *)inst[i+1])->name, (int)inst[i+2]); diff -ur nedit_official/source/interpret.h nedit_mod/source/interpret.h --- nedit_official/source/interpret.h 2004-04-30 10:35:16.000000000 -0400 +++ nedit_mod/source/interpret.h 2004-06-11 18:10:18.812165000 -0400 @@ -15,7 +15,7 @@ enum symTypes {CONST_SYM, GLOBAL_SYM, LOCAL_SYM, ARG_SYM, PROC_VALUE_SYM, C_FUNCTION_SYM, MACRO_FUNCTION_SYM, ACTION_ROUTINE_SYM}; -#define N_OPS 43 + enum operations {OP_RETURN_NO_VAL, OP_RETURN, OP_PUSH_SYM, OP_DUP, OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_NEGATE, OP_INCR, OP_DECR, OP_GT, OP_LT, OP_GE, OP_LE, OP_EQ, OP_NE, OP_BIT_AND, OP_BIT_OR, OP_AND, OP_OR, OP_NOT, @@ -23,7 +23,7 @@ OP_BRANCH_TRUE, OP_BRANCH_FALSE, OP_BRANCH_NEVER, OP_ARRAY_REF, OP_ARRAY_ASSIGN, OP_BEGIN_ARRAY_ITER, OP_ARRAY_ITER, OP_IN_ARRAY, OP_ARRAY_DELETE, OP_PUSH_ARRAY_SYM, OP_ARRAY_REF_ASSIGN_SETUP, OP_PUSH_ARG, - OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY}; + OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY, N_OPS}; enum typeTags {NO_TAG, INT_TAG, STRING_TAG, ARRAY_TAG}; @@ -131,6 +131,7 @@ char *AllocString(int length); char *AllocStringNCpy(const char *s, int length); char *AllocStringCpy(const char *s); +char *AllocStringOfNumber(long val); int AllocNString(NString *string, int length); int AllocNStringNCpy(NString *string, const char *s, int length); int AllocNStringCpy(NString *string, const char *s); diff -ur nedit_official/source/parse.y nedit_mod/source/parse.y --- nedit_official/source/parse.y 2003-12-19 18:23:31.000000000 -0500 +++ nedit_mod/source/parse.y 2004-06-11 18:13:18.204213000 -0400 @@ -62,7 +62,7 @@ %token NUMBER STRING SYMBOL %token DELETE ARG_LOOKUP %token IF WHILE ELSE FOR BREAK CONTINUE RETURN -%type arglist +%type arglist catlist %type cond comastmts for while else and or arrayexpr %type evalsym @@ -283,9 +283,16 @@ $$ = $1 + 1; } ; -expr: numexpr %prec CONCAT - | expr numexpr %prec CONCAT { - ADD_OP(OP_CONCAT); +catlist: numexpr %prec CONCAT { + $$ = 1; + } + | catlist numexpr %prec CONCAT { + $$ = $1 + 1; + } +expr: catlist { + if ($1 > 1) { + ADD_OP(OP_CONCAT); ADD_IMMED((void *)$1); + } } ; initarraylv: SYMBOL {