From: <eri...@us...> - 2017-04-01 21:42:41
|
Revision: 11224 http://sourceforge.net/p/oorexx/code-0/11224 Author: erich_st Date: 2017-04-01 21:42:34 +0000 (Sat, 01 Apr 2017) Log Message: ----------- code changes for [feature-requests:#639 'xrange() to support more than one range' Modified Paths: -------------- main/trunk/api/oorexxerrors.h main/trunk/interpreter/RexxClasses/CoreClasses.orx main/trunk/interpreter/classes/IntegerClass.cpp main/trunk/interpreter/classes/StringClass.cpp main/trunk/interpreter/classes/StringClass.hpp main/trunk/interpreter/expression/BuiltinFunctions.cpp main/trunk/interpreter/messages/RexxErrorCodes.h main/trunk/interpreter/messages/RexxMessageNumbers.h main/trunk/interpreter/messages/RexxMessageTable.h main/trunk/interpreter/messages/errnums.xml main/trunk/interpreter/messages/rexxmsg.xml main/trunk/interpreter/platform/unix/gencat.inp main/trunk/interpreter/platform/windows/winmsgtb.rc Modified: main/trunk/api/oorexxerrors.h =================================================================== --- main/trunk/api/oorexxerrors.h 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/api/oorexxerrors.h 2017-04-01 21:42:34 UTC (rev 11224) @@ -368,6 +368,7 @@ #define Rexx_Error_Incorrect_call_binary 40024 #define Rexx_Error_Incorrect_call_hex 40025 #define Rexx_Error_Incorrect_call_symbol 40026 +#define Rexx_Error_Incorrect_call_pad_or_name 40028 #define Rexx_Error_Incorrect_call_list 40904 #define Rexx_Error_Incorrect_call_trace 40905 #define Rexx_Error_Incorrect_call_random 40033 Modified: main/trunk/interpreter/RexxClasses/CoreClasses.orx =================================================================== --- main/trunk/interpreter/RexxClasses/CoreClasses.orx 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/RexxClasses/CoreClasses.orx 2017-04-01 21:42:34 UTC (rev 11224) @@ -1,7 +1,7 @@ /*----------------------------------------------------------------------------*/ /* */ /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */ -/* Copyright (c) 2005-2014 Rexx Language Association. All rights reserved. */ +/* Copyright (c) 2005-2017 Rexx Language Association. All rights reserved. */ /* */ /* This program and the accompanying materials are made available under */ /* the terms of the Common Public License v1.0 which accompanies this */ @@ -61,10 +61,11 @@ end -- add string class constant methods -.String~defineClassMethod("NL", .methods~string_cls_nl) -.String~defineClassMethod("CR", .methods~string_cls_cr) -.String~defineClassMethod("TAB", .methods~string_cls_tab) -.String~defineClassMethod("NULL", .methods~string_cls_null) +do name over "nl", "cr", "tab", "null", - + "alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower", - + "print", "punct", "space", "upper", "xdigit" + .String~defineClassMethod(name~upper, .methods[("string_cls_" || name)~upper]) +end -- Some classes have addtional methods added that are written in Rexx. -- we do a phony inherit to add those directly to the class instance @@ -76,7 +77,7 @@ .bag~inheritInstanceMethods(.ManyItemMixin) -- set has its own set methods .set~inheritInstanceMethods(.SetMixin) --- as does Bat +-- as does Bag .bag~inheritInstanceMethods(.BagMixin) -- now handle real inherits for the primitive classes. @@ -141,6 +142,20 @@ use strict arg return "00"x +::method string_cls_alnum; use strict arg; return xrange("alnum") +::method string_cls_alpha; use strict arg; return xrange("alpha") +::method string_cls_blank; use strict arg; return xrange("blank") +::method string_cls_cntrl; use strict arg; return xrange("cntrl") +::method string_cls_digit; use strict arg; return xrange("digit") +::method string_cls_graph; use strict arg; return xrange("graph") +::method string_cls_lower; use strict arg; return xrange("lower") +::method string_cls_print; use strict arg; return xrange("print") +::method string_cls_punct; use strict arg; return xrange("punct") +::method string_cls_space; use strict arg; return xrange("space") +::method string_cls_upper; use strict arg; return xrange("upper") +::method string_cls_xdigit; use strict arg; return xrange("xdigit") + + -- Start of classes that exist only to add collections of methods to -- primitive classes Modified: main/trunk/interpreter/classes/IntegerClass.cpp =================================================================== --- main/trunk/interpreter/classes/IntegerClass.cpp 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/classes/IntegerClass.cpp 2017-04-01 21:42:34 UTC (rev 11224) @@ -691,7 +691,7 @@ } // the product should be a valid integer under the current numeric - // digits; if we know it won't fit, there's no neeed to multiply + // digits; if we know it won't fit, there's no need to multiply // we can estimate this: multiplying an m-bit number with an n-bit // number yields a product of either (m + n - 1) or (m + n) bits // we test (m + n - 1) <= 30 (for 32-bit) and 60 (for 64-bit), @@ -956,7 +956,7 @@ case 2: // we'll evaluate n ^ 2 as n * n // the result should be a valid integer under the current numeric - // digits; if we know it won't fit, there's no neeed to multiply + // digits; if we know it won't fit, there's no need to multiply // we can estimate this: squaring an n-bit number yields a result // of either (2n - 1) or (2n) bits // we test (2n - 1) <= 30 (for 32-bit) and 60 (for 64-bit), @@ -1003,7 +1003,7 @@ // no common base or power .. try to do the full calculation // the result should be a valid integer under the current numeric digits - // if we know it won't fit, there's no neeed to try + // if we know it won't fit, there's no need to try // we can estimate the result size: if base has b bits, the result // will require between (b * power - b + 1) and (b * power) bits wholenumber_t maxBits = Numerics::maxBitsForDigits(number_digits()); @@ -1569,7 +1569,7 @@ return numberString()->Max(args, argCount); } } - // return the minimum object + // return the maximum object return maxObject; } Modified: main/trunk/interpreter/classes/StringClass.cpp =================================================================== --- main/trunk/interpreter/classes/StringClass.cpp 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/classes/StringClass.cpp 2017-04-01 21:42:34 UTC (rev 11224) @@ -1,7 +1,7 @@ /*----------------------------------------------------------------------------*/ /* */ /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */ -/* Copyright (c) 2005-2014 Rexx Language Association. All rights reserved. */ +/* Copyright (c) 2005-2017 Rexx Language Association. All rights reserved. */ /* */ /* This program and the accompanying materials are made available under */ /* the terms of the Common Public License v1.0 which accompanies this */ @@ -68,6 +68,36 @@ const char *RexxString::UPPER_ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const char *RexxString::DIGITS_BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +/* +https://en.wikipedia.org/wiki/Regular_expression#Character_classes +POSIX character ranges returned by XRANGE() and String methods +[:alnum:] [A-Za-z0-9] Alphanumeric characters +[:alpha:] [A-Za-z] Alphabetic characters +[:blank:] [ \t] Space and tab +[:cntrl:] [\x00-\x1F\x7F] Control characters +[:digit:] [0-9] Digits +[:graph:] [\x21-\x7E] Visible characters +[:lower:] [a-z] Lowercase letters +[:print:] [\x20-\x7E] Visible characters and the space character +[:punct:] [][!"#$%&'()*+,./:;<=>?@\^_`{|}~-] Punctuation characters +[:space:] [ \t\r\n\v\f] Whitespace characters +[:upper:] [A-Z] Uppercase letters +[:xdigit:] [A-Fa-f0-9] Hexadecimal digits +*/ +// the character ranges are returned in ascending byte order +const char *RexxString::ALNUM = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +const char *RexxString::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +const char *RexxString::BLANK = "\t "; +const char *RexxString::CNTRL = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f"; +const char *RexxString::DIGIT = "0123456789"; +const char *RexxString::GRAPH = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; +const char *RexxString::LOWER = RexxString::LOWER_ALPHA; +const char *RexxString::PRINT = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; +const char *RexxString::PUNCT = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; +const char *RexxString::SPACE = "\t\n\v\f\r "; +const char *RexxString::UPPER = RexxString::UPPER_ALPHA; +const char *RexxString::XDIGIT = "0123456789ABCDEFabcdef"; + const char RexxString::ch_PLUS='+'; const char RexxString::ch_MINUS='-'; const char RexxString::ch_PERIOD='.'; Modified: main/trunk/interpreter/classes/StringClass.hpp =================================================================== --- main/trunk/interpreter/classes/StringClass.hpp 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/classes/StringClass.hpp 2017-04-01 21:42:34 UTC (rev 11224) @@ -1,7 +1,7 @@ /*----------------------------------------------------------------------------*/ /* */ /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */ -/* Copyright (c) 2005-2014 Rexx Language Association. All rights reserved. */ +/* Copyright (c) 2005-2017 Rexx Language Association. All rights reserved. */ /* */ /* This program and the accompanying materials are made available under */ /* the terms of the Common Public License v1.0 which accompanies this */ @@ -85,9 +85,11 @@ class StringBuilder { public: + inline StringBuilder() {} inline StringBuilder(char *b) : current(b) {} inline StringBuilder(RexxString *s) : current(s->getWritableData()) {} + inline void init(RexxString *s) { current = s->getWritableData(); } inline void append(const char *d, size_t l) { memcpy(current, d, l); current += l; } inline void append(const char *d) { size_t l = strlen(d); memcpy(current, d, l); current += l; } inline void append(char c) { *current++ = c; } @@ -746,6 +748,21 @@ static const char *UPPER_ALPHA; static const char *DIGITS_BASE64; + // POSIX character ranges returned by XRANGE() and .String class methods + static const char *RexxString::ALNUM; + static const char *RexxString::ALPHA; + static const char *RexxString::BLANK; + static const char *RexxString::CNTRL; + static const char *RexxString::DIGIT; + static const char *RexxString::GRAPH; + static const char *RexxString::LOWER; + static const char *RexxString::PRINT; + static const char *RexxString::PUNCT; + static const char *RexxString::SPACE; + static const char *RexxString::UPPER; + static const char *RexxString::XDIGIT; + + protected: HashCode hashValue; // stored has value Modified: main/trunk/interpreter/expression/BuiltinFunctions.cpp =================================================================== --- main/trunk/interpreter/expression/BuiltinFunctions.cpp 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/expression/BuiltinFunctions.cpp 2017-04-01 21:42:34 UTC (rev 11224) @@ -1593,50 +1593,159 @@ BUILTIN(XRANGE) { const size_t XRANGE_Min = 0; - const size_t XRANGE_Max = 2; - const size_t XRANGE_start = 1; - const size_t XRANGE_end = 2; + const size_t XRANGE_Max = argcount; fix_args(XRANGE); - // default start and end positions are the full range - char startchar = 0; - char endchar = (char)0xff; + char startchar, endchar; // start and end positions + typedef enum {START_END, CHAR_CLASS} arg_t; + arg_t argumentType; // character class or start/end range - RexxString *start = optional_string(XRANGE, start); - RexxString *end = optional_string(XRANGE, end); + RexxString *first, *second, *result; + RexxString::StringBuilder result_builder; + size_t length, totalLength = 0; + size_t XRANGE_arg; + const char *characterClass; - // validate the starts and end - if (start != OREF_NULL) + // we will need to know the total length of the result string + // before we can begin to build it + // if there are more than one or two args, we'll have to step through + // all our args twice: first, to count the total string length, and + // a second time to append all the pieces together + typedef enum {CALC_LENGTH, BUILD_STRING} work_t; + work_t mode = CALC_LENGTH; // step one: calculate total length + + for (size_t loops = 1; loops <= 2; loops++) { - // must be just a single character - if (start->getLength() != 1) + XRANGE_arg = 0; + // we want to enter our loop even if argcount is zero + while (XRANGE_arg == 0 || XRANGE_arg < argcount) { - reportException(Error_Incorrect_call_pad, "XRANGE", IntegerOne, start); + // default start and end positions are the full range + startchar = 0; + endchar = (char)0xff; + argumentType = START_END; + + // for each loop, we can accept either: + // - no args + // - first arg length 1, no second arg: a start byte only + // - first arg length larger than 1, no second arg: a character class + // - no first arg, second arg length 1: an end byte only + // - first arg length 1, second arg length 1: both a start and an end byte + + XRANGE_arg++; + if ((first = optional_string(XRANGE, arg)) != OREF_NULL) + { + // must be a single character or a character class name + if (first->getLength() == 1) + { + // single character means a start byte + startchar = first->getChar(0); + } + else + { + // must be a character class name + argumentType = CHAR_CLASS; + if (first->strCaselessCompare("alnum")) characterClass = RexxString::ALNUM; + else if (first->strCaselessCompare("alpha")) characterClass = RexxString::ALPHA; + else if (first->strCaselessCompare("blank")) characterClass = RexxString::BLANK; + else if (first->strCaselessCompare("cntrl")) characterClass = RexxString::CNTRL; + else if (first->strCaselessCompare("digit")) characterClass = RexxString::DIGIT; + else if (first->strCaselessCompare("graph")) characterClass = RexxString::GRAPH; + else if (first->strCaselessCompare("lower")) characterClass = RexxString::LOWER; + else if (first->strCaselessCompare("print")) characterClass = RexxString::PRINT; + else if (first->strCaselessCompare("punct")) characterClass = RexxString::PUNCT; + else if (first->strCaselessCompare("space")) characterClass = RexxString::SPACE; + else if (first->strCaselessCompare("upper")) characterClass = RexxString::UPPER; + else if (first->strCaselessCompare("xdigit")) characterClass = RexxString::XDIGIT; + else reportException(Error_Incorrect_call_pad_or_name, "XRANGE", new_integer(XRANGE_arg), first); + + } + } + if (argumentType == CHAR_CLASS) + { + // CNTRL contains a leading NUL character, so we calculate length here + length = 1 + strlen(characterClass + 1); + + // just one character class arg? we can finish this early + if (mode == CALC_LENGTH && argcount == 1) + { + return new_string(characterClass, length); + } + else if (mode == CALC_LENGTH) + { + totalLength += length; + } + else // mode == BUILD_STRING + { + result_builder.append(characterClass, length); + } + + // if this was a character class, we won't have a second arg + continue; + } + + // if run out of args, endchar is already set to its default + XRANGE_arg++; + if ((second = optional_string(XRANGE, arg)) != OREF_NULL) + { + // must be a single character + if (second->getLength() != 1) + { + reportException(Error_Incorrect_call_pad, "XRANGE", new_integer(XRANGE_arg), second); + } + else + { + // the single character is the end byte + endchar = second->getChar(0); + } + } + + length = 1 + (endchar < startchar ? 256 - startchar + endchar : endchar - startchar); + + // just two args? we can finish this early + if (mode == CALC_LENGTH && argcount <= 2) + { + // create a new string to build the result + result = raw_string(length); + result_builder.init(result); + for (size_t i = 0; i < length; i++) + { + // NOTE: This depends on the fact that we are only inserting the + // least significant byte here, so the wrap situation is handled + // automatically. + result_builder.append(startchar++); + } + return result; + } + else if (mode == CALC_LENGTH) + { + totalLength += length; + } + else // mode == BUILD_STRING + { + for (size_t i = 0; i < length; i++) + { + // NOTE: This depends on the fact that we are only inserting the + // least significant byte here, so the wrap situation is handled + // automatically. + result_builder.append(startchar++); + } + } } - startchar = start->getChar(0); - } - // same rules with the end - if (end != OREF_NULL) - { - if (end->getLength() != 1) + + if (mode == CALC_LENGTH) { - reportException(Error_Incorrect_call_pad, "XRANGE", IntegerTwo, end); + // finished counting length, switch to building string + mode = BUILD_STRING; // step two: build the string + + // create a new string to build the result + result = raw_string(totalLength); + result_builder.init(result); } - endchar = end->getChar(0); } - // calculate the result size...note that XRANGE can wrap if the end precedes the start - size_t length = ((endchar < startchar) ? (256 - startchar) + endchar : (endchar - startchar)) + 1; - - RexxString *result = raw_string(length); - for (size_t i = 0; i < length; i++) - { - // NOTE: This depends on the fact that we are only inserting the - // least significant byte here, so the wrap situation is handled - // automatically. - result->putChar(i, startchar++); - } + // finished building string return result; } Modified: main/trunk/interpreter/messages/RexxErrorCodes.h =================================================================== --- main/trunk/interpreter/messages/RexxErrorCodes.h 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/messages/RexxErrorCodes.h 2017-04-01 21:42:34 UTC (rev 11224) @@ -370,6 +370,7 @@ Error_Incorrect_call_binary = 40024, Error_Incorrect_call_hex = 40025, Error_Incorrect_call_symbol = 40026, +Error_Incorrect_call_pad_or_name = 40028, Error_Incorrect_call_list = 40904, Error_Incorrect_call_trace = 40905, Error_Incorrect_call_random = 40033, Modified: main/trunk/interpreter/messages/RexxMessageNumbers.h =================================================================== --- main/trunk/interpreter/messages/RexxMessageNumbers.h 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/messages/RexxMessageNumbers.h 2017-04-01 21:42:34 UTC (rev 11224) @@ -273,6 +273,7 @@ #define Error_Expression_result_trace_msg 331 #define Error_Expression_result_raise_msg 332 #define Error_Logical_value_if_msg 333 +#define Error_Incorrect_call_pad_or_name_msg 334 #define Error_Logical_value_while_msg 335 #define Error_Logical_value_until_msg 336 #define Error_Logical_value_logical_msg 337 Modified: main/trunk/interpreter/messages/RexxMessageTable.h =================================================================== --- main/trunk/interpreter/messages/RexxMessageTable.h 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/messages/RexxMessageTable.h 2017-04-01 21:42:34 UTC (rev 11224) @@ -372,6 +372,7 @@ MINOR(Error_Incorrect_call_binary) MINOR(Error_Incorrect_call_hex) MINOR(Error_Incorrect_call_symbol) + MINOR(Error_Incorrect_call_pad_or_name) MINOR(Error_Incorrect_call_list) MINOR(Error_Incorrect_call_trace) MINOR(Error_Incorrect_call_random) Modified: main/trunk/interpreter/messages/errnums.xml =================================================================== --- main/trunk/interpreter/messages/errnums.xml 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/messages/errnums.xml 2017-04-01 21:42:34 UTC (rev 11224) @@ -2184,6 +2184,13 @@ </listitem> </varlistentry> <varlistentry> +<term>028</term> +<listitem> +<para> +<emphasis role="italic">function_name</emphasis> argument <emphasis role="italic">argument_number</emphasis> must be a character class name or a single character; found "<emphasis role="italic">value</emphasis>".</para> +</listitem> +</varlistentry> +<varlistentry> <term>029</term> <listitem> <para> Modified: main/trunk/interpreter/messages/rexxmsg.xml =================================================================== --- main/trunk/interpreter/messages/rexxmsg.xml 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/messages/rexxmsg.xml 2017-04-01 21:42:34 UTC (rev 11224) @@ -3079,6 +3079,15 @@ </SubMessage> <SubMessage> <Code>40</Code> + <Subcode>028</Subcode> + <MessageNumber>334</MessageNumber> + <Component>Rexx</Component> + <Severity>Warning</Severity> + <SymbolicName>Error_Incorrect_call_pad_or_name</SymbolicName> + <Text><Sub position="1" name="function_name"/> argument <Sub position="2" name="argument_number"/> must be a character class name or a single character; found <q><Sub position="3" name="value"/></q>.</Text> + </SubMessage> + <SubMessage> + <Code>40</Code> <Subcode>904</Subcode> <MessageNumber>389</MessageNumber> <Component>Rexx</Component> Modified: main/trunk/interpreter/platform/unix/gencat.inp =================================================================== --- main/trunk/interpreter/platform/unix/gencat.inp 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/platform/unix/gencat.inp 2017-04-01 21:42:34 UTC (rev 11224) @@ -710,6 +710,9 @@ $ Error_Logical_value_if 333 Value of expression following IF keyword must be exactly "0" or "1"; found "&1". +$ Error_Incorrect_call_pad_or_name +334 &1 argument &2 must be a character class name or a single character; found "&3". + $ Error_Logical_value_while 335 Value of expression following WHILE keyword must be exactly "0" or "1"; found "&1". Modified: main/trunk/interpreter/platform/windows/winmsgtb.rc =================================================================== --- main/trunk/interpreter/platform/windows/winmsgtb.rc 2017-03-31 13:29:26 UTC (rev 11223) +++ main/trunk/interpreter/platform/windows/winmsgtb.rc 2017-04-01 21:42:34 UTC (rev 11224) @@ -272,6 +272,7 @@ 33903 "Incorrect expression result following VALUE keyword of TRACE instruction." 33904 "Incorrect expression result following SYNTAX keyword of RAISE instruction." 34001 "Value of expression following IF keyword must be exactly ""0"" or ""1""; found ""&1""." + 40028 "&1 argument &2 must be a character class name or a single character; found ""&3""." 34003 "Value of expression following WHILE keyword must be exactly ""0"" or ""1""; found ""&1""." 34004 "Value of expression following UNTIL keyword must be exactly ""0"" or ""1""; found ""&1""." 34005 "Value of expression to the left of the logical operator ""&1"" must be exactly ""0"" or ""1""; found ""&2""." |