From: <cg...@us...> - 2006-07-20 00:59:29
|
Revision: 2851 Author: cgroves Date: 2006-07-19 17:59:23 -0700 (Wed, 19 Jul 2006) ViewCVS: http://svn.sourceforge.net/jython/?rev=2851&view=rev Log Message: ----------- updated constants to those from sre_constants.h from Python-2.3.5, reordered methods to follow the order in _sre.c and added SRE_OP_MIN_REPEAT_ONE to match Modified Paths: -------------- branches/2.3/src/org/python/modules/_sre.java branches/2.3/src/org/python/modules/sre/SRE_STATE.java Modified: branches/2.3/src/org/python/modules/_sre.java =================================================================== --- branches/2.3/src/org/python/modules/_sre.java 2006-07-20 00:54:35 UTC (rev 2850) +++ branches/2.3/src/org/python/modules/_sre.java 2006-07-20 00:59:23 UTC (rev 2851) @@ -26,7 +26,7 @@ public class _sre { // update when constants are added or removed - public static int MAGIC = 20030419; + public static int MAGIC = SRE_STATE.SRE_MAGIC; //XXX: May not be the right size, but I suspect it is -- see sre_compile.py public static int CODESIZE = 2; @@ -57,7 +57,7 @@ public static int getcodesize() { - return 2; + return CODESIZE; } Modified: branches/2.3/src/org/python/modules/sre/SRE_STATE.java =================================================================== --- branches/2.3/src/org/python/modules/sre/SRE_STATE.java 2006-07-20 00:54:35 UTC (rev 2850) +++ branches/2.3/src/org/python/modules/sre/SRE_STATE.java 2006-07-20 00:59:23 UTC (rev 2851) @@ -19,167 +19,233 @@ public class SRE_STATE { - /* illegal opcode */ + + /* + * Generated from Python-2.3.5 like 'python headerToJava.py < Modules/sre_constants.h' + * where headerToJava.py contains the following code +import sys +for line in sys.stdin: + if line.startswith('#define'): + line = line.replace('#define', 'public static final int').strip() + segs = line.split(' ') + print '%s = %s;' % (' '.join(segs[:-1]), segs[-1]) + */ + //BEGIN generated code + public static final int SRE_MAGIC = 20030419; + public static final int SRE_OP_FAILURE = 0; + public static final int SRE_OP_SUCCESS = 1; + public static final int SRE_OP_ANY = 2; + public static final int SRE_OP_ANY_ALL = 3; + public static final int SRE_OP_ASSERT = 4; + public static final int SRE_OP_ASSERT_NOT = 5; + public static final int SRE_OP_AT = 6; + public static final int SRE_OP_BRANCH = 7; + public static final int SRE_OP_CALL = 8; + public static final int SRE_OP_CATEGORY = 9; + public static final int SRE_OP_CHARSET = 10; + public static final int SRE_OP_BIGCHARSET = 11; + public static final int SRE_OP_GROUPREF = 12; + public static final int SRE_OP_GROUPREF_IGNORE = 13; + public static final int SRE_OP_IN = 14; + public static final int SRE_OP_IN_IGNORE = 15; + public static final int SRE_OP_INFO = 16; + public static final int SRE_OP_JUMP = 17; + public static final int SRE_OP_LITERAL = 18; + public static final int SRE_OP_LITERAL_IGNORE = 19; + public static final int SRE_OP_MARK = 20; + public static final int SRE_OP_MAX_UNTIL = 21; + public static final int SRE_OP_MIN_UNTIL = 22; + public static final int SRE_OP_NOT_LITERAL = 23; + public static final int SRE_OP_NOT_LITERAL_IGNORE = 24; + public static final int SRE_OP_NEGATE = 25; + public static final int SRE_OP_RANGE = 26; + public static final int SRE_OP_REPEAT = 27; + public static final int SRE_OP_REPEAT_ONE = 28; + public static final int SRE_OP_SUBPATTERN = 29; + public static final int SRE_OP_MIN_REPEAT_ONE = 30; + public static final int SRE_AT_BEGINNING = 0; + public static final int SRE_AT_BEGINNING_LINE = 1; + public static final int SRE_AT_BEGINNING_STRING = 2; + public static final int SRE_AT_BOUNDARY = 3; + public static final int SRE_AT_NON_BOUNDARY = 4; + public static final int SRE_AT_END = 5; + public static final int SRE_AT_END_LINE = 6; + public static final int SRE_AT_END_STRING = 7; + public static final int SRE_AT_LOC_BOUNDARY = 8; + public static final int SRE_AT_LOC_NON_BOUNDARY = 9; + public static final int SRE_AT_UNI_BOUNDARY = 10; + public static final int SRE_AT_UNI_NON_BOUNDARY = 11; + public static final int SRE_CATEGORY_DIGIT = 0; + public static final int SRE_CATEGORY_NOT_DIGIT = 1; + public static final int SRE_CATEGORY_SPACE = 2; + public static final int SRE_CATEGORY_NOT_SPACE = 3; + public static final int SRE_CATEGORY_WORD = 4; + public static final int SRE_CATEGORY_NOT_WORD = 5; + public static final int SRE_CATEGORY_LINEBREAK = 6; + public static final int SRE_CATEGORY_NOT_LINEBREAK = 7; + public static final int SRE_CATEGORY_LOC_WORD = 8; + public static final int SRE_CATEGORY_LOC_NOT_WORD = 9; + public static final int SRE_CATEGORY_UNI_DIGIT = 10; + public static final int SRE_CATEGORY_UNI_NOT_DIGIT = 11; + public static final int SRE_CATEGORY_UNI_SPACE = 12; + public static final int SRE_CATEGORY_UNI_NOT_SPACE = 13; + public static final int SRE_CATEGORY_UNI_WORD = 14; + public static final int SRE_CATEGORY_UNI_NOT_WORD = 15; + public static final int SRE_CATEGORY_UNI_LINEBREAK = 16; + public static final int SRE_CATEGORY_UNI_NOT_LINEBREAK = 17; + public static final int SRE_FLAG_TEMPLATE = 1; + public static final int SRE_FLAG_IGNORECASE = 2; + public static final int SRE_FLAG_LOCALE = 4; + public static final int SRE_FLAG_MULTILINE = 8; + public static final int SRE_FLAG_DOTALL = 16; + public static final int SRE_FLAG_UNICODE = 32; + public static final int SRE_FLAG_VERBOSE = 64; + public static final int SRE_INFO_PREFIX = 1; + public static final int SRE_INFO_LITERAL = 2; + public static final int SRE_INFO_CHARSET = 4; + //END generated code + + //From here we're including things from _sre.c in the order they're defined there + public static final int USE_RECURSION_LIMIT = 5000; + + /* error codes */ public static final int SRE_ERROR_ILLEGAL = -1; - - /* illegal state */ public static final int SRE_ERROR_STATE = -2; - - /* runaway recursion */ public static final int SRE_ERROR_RECURSION_LIMIT = -3; - public static final int SRE_OP_FAILURE = 0; - public static final int SRE_OP_SUCCESS = 1; - public static final int SRE_OP_ANY = 2; - public static final int SRE_OP_ANY_ALL = 3; - public static final int SRE_OP_ASSERT = 4; - public static final int SRE_OP_ASSERT_NOT = 5; - public static final int SRE_OP_AT = 6; - public static final int SRE_OP_BRANCH = 7; - public static final int SRE_OP_CALL = 8; - public static final int SRE_OP_CATEGORY = 9; - public static final int SRE_OP_CHARSET = 10; - public static final int SRE_OP_BIGCHARSET = 11; - public static final int SRE_OP_GROUPREF = 12; - public static final int SRE_OP_GROUPREF_IGNORE = 13; - public static final int SRE_OP_IN = 14; - public static final int SRE_OP_IN_IGNORE = 15; - public static final int SRE_OP_INFO = 16; - public static final int SRE_OP_JUMP = 17; - public static final int SRE_OP_LITERAL = 18; - public static final int SRE_OP_LITERAL_IGNORE = 19; - public static final int SRE_OP_MARK = 20; - public static final int SRE_OP_MAX_UNTIL = 21; - public static final int SRE_OP_MIN_UNTIL = 22; - public static final int SRE_OP_NOT_LITERAL = 23; - public static final int SRE_OP_NOT_LITERAL_IGNORE = 24; - public static final int SRE_OP_NEGATE = 25; - public static final int SRE_OP_RANGE = 26; - public static final int SRE_OP_REPEAT = 27; - public static final int SRE_OP_REPEAT_ONE = 28; - public static final int SRE_OP_SUBPATTERN = 29; + /* default character predicates (run sre_chars.py to regenerate tables) */ + static final int SRE_DIGIT_MASK = 1; + static final int SRE_SPACE_MASK = 2; + static final int SRE_LINEBREAK_MASK = 4; + static final int SRE_ALNUM_MASK = 8; + static final int SRE_WORD_MASK = 16; - public static final int SRE_AT_BEGINNING = 0; - public static final int SRE_AT_BEGINNING_LINE = 1; - public static final int SRE_AT_BEGINNING_STRING = 2; - public static final int SRE_AT_BOUNDARY = 3; - public static final int SRE_AT_NON_BOUNDARY = 4; - public static final int SRE_AT_END = 5; - public static final int SRE_AT_END_LINE = 6; - public static final int SRE_AT_END_STRING = 7; - public static final int SRE_AT_LOC_BOUNDARY = 8; - public static final int SRE_AT_LOC_NON_BOUNDARY = 9; - public static final int SRE_AT_UNI_BOUNDARY = 10; - public static final int SRE_AT_UNI_NON_BOUNDARY = 11; + static byte[] sre_char_info = new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, + 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, + 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; - public static final int SRE_CATEGORY_DIGIT = 0; - public static final int SRE_CATEGORY_NOT_DIGIT = 1; - public static final int SRE_CATEGORY_SPACE = 2; - public static final int SRE_CATEGORY_NOT_SPACE = 3; - public static final int SRE_CATEGORY_WORD = 4; - public static final int SRE_CATEGORY_NOT_WORD = 5; - public static final int SRE_CATEGORY_LINEBREAK = 6; - public static final int SRE_CATEGORY_NOT_LINEBREAK = 7; - public static final int SRE_CATEGORY_LOC_WORD = 8; - public static final int SRE_CATEGORY_LOC_NOT_WORD = 9; - public static final int SRE_CATEGORY_UNI_DIGIT = 10; - public static final int SRE_CATEGORY_UNI_NOT_DIGIT = 11; - public static final int SRE_CATEGORY_UNI_SPACE = 12; - public static final int SRE_CATEGORY_UNI_NOT_SPACE = 13; - public static final int SRE_CATEGORY_UNI_WORD = 14; - public static final int SRE_CATEGORY_UNI_NOT_WORD = 15; - public static final int SRE_CATEGORY_UNI_LINEBREAK = 16; - public static final int SRE_CATEGORY_UNI_NOT_LINEBREAK = 17; + static byte[] sre_char_lower = new byte[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, + 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127 }; - public static final int SRE_FLAG_TEMPLATE = 1; - public static final int SRE_FLAG_IGNORECASE = 2; - public static final int SRE_FLAG_LOCALE = 4; - public static final int SRE_FLAG_MULTILINE = 8; - public static final int SRE_FLAG_DOTALL = 16; - public static final int SRE_FLAG_UNICODE = 32; - public static final int SRE_FLAG_VERBOSE = 64; + final boolean SRE_IS_DIGIT(char ch) { + return ((ch) < 128 ? + (sre_char_info[(ch)] & SRE_DIGIT_MASK) != 0 : false); + } - public static final int SRE_INFO_PREFIX = 1; - public static final int SRE_INFO_LITERAL = 2; - public static final int SRE_INFO_CHARSET = 4; + final boolean SRE_IS_SPACE(char ch) { + return ((ch) < 128 ? + (sre_char_info[(ch)] & SRE_SPACE_MASK) != 0 : false); + } + final boolean SRE_IS_LINEBREAK(char ch) { + //TODO why is this different than _sre.c + return ch == '\n'; + } - public static final int USE_RECURSION_LIMIT = 2000; + final boolean SRE_IS_WORD(char ch) { + return ((ch) < 128 ? + (sre_char_info[(ch)] & SRE_WORD_MASK) != 0 : false); + } + final char lower(char ch) { + if ((flags & SRE_FLAG_LOCALE) != 0) + return ((ch) < 256 ? Character.toLowerCase(ch) : ch); + if ((flags & SRE_FLAG_UNICODE) != 0) + return Character.toLowerCase(ch); + return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); + } - /* string pointers */ - int ptr; /* current position (also end of current slice) */ - int beginning; /* start of original string */ - int start; /* start of current slice */ - int end; /* end of original string */ + final boolean SRE_LOC_IS_WORD(char ch) { + return Character.isLetterOrDigit(ch) || ch == '_'; + } - /* attributes for the match object */ - char[] str; - int pos; - int endpos; + final boolean SRE_UNI_IS_LINEBREAK(char ch) { + switch (ch) { + case 0x000A: /* LINE FEED */ + case 0x000D: /* CARRIAGE RETURN */ + case 0x001C: /* FILE SEPARATOR */ + case 0x001D: /* GROUP SEPARATOR */ + case 0x001E: /* RECORD SEPARATOR */ + case 0x0085: /* NEXT LINE */ + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ + return true; + default: + return false; + } + } + + final boolean sre_category(char category, char ch) { + switch (category) { - /* character size */ - int charsize; + case SRE_CATEGORY_DIGIT: + return SRE_IS_DIGIT(ch); + case SRE_CATEGORY_NOT_DIGIT: + return ! SRE_IS_DIGIT(ch); - /* registers */ - int lastindex; - int lastmark; + case SRE_CATEGORY_SPACE: + return SRE_IS_SPACE(ch); + case SRE_CATEGORY_NOT_SPACE: + return ! SRE_IS_SPACE(ch); - /* FIXME: <fl> should be dynamically allocated! */ - int[] mark = new int[200]; + case SRE_CATEGORY_WORD: + return SRE_IS_WORD(ch); + case SRE_CATEGORY_NOT_WORD: + return ! SRE_IS_WORD(ch); - /* dynamically allocated stuff */ - int[] mark_stack; - int mark_stack_size; - int mark_stack_base; + case SRE_CATEGORY_LINEBREAK: + return SRE_IS_LINEBREAK(ch); + case SRE_CATEGORY_NOT_LINEBREAK: + return ! SRE_IS_LINEBREAK(ch); - SRE_REPEAT repeat; /* current repeat context */ + case SRE_CATEGORY_LOC_WORD: + return SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_LOC_NOT_WORD: + return ! SRE_LOC_IS_WORD(ch); - /* debugging */ - int maxlevel; - /* duplicated from the PatternObject */ - int flags; + case SRE_CATEGORY_UNI_DIGIT: + return Character.isDigit(ch); + case SRE_CATEGORY_UNI_NOT_DIGIT: + return !Character.isDigit(ch); + case SRE_CATEGORY_UNI_SPACE: + return Character.isWhitespace(ch); + case SRE_CATEGORY_UNI_NOT_SPACE: + return !Character.isWhitespace(ch); + case SRE_CATEGORY_UNI_WORD: + return Character.isLetterOrDigit(ch) || ch == '_'; + case SRE_CATEGORY_UNI_NOT_WORD: + return ! (Character.isLetterOrDigit(ch) || ch == '_'); + case SRE_CATEGORY_UNI_LINEBREAK: + return SRE_UNI_IS_LINEBREAK(ch); + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + return ! SRE_UNI_IS_LINEBREAK(ch); - public SRE_STATE(String str, int start, int end, int flags) { - this.str = str.toCharArray(); - int size = str.length(); - - this.charsize = 1; - - /* adjust boundaries */ - if (start < 0) - start = 0; - else if (start > size) - start = size; - - if (end < 0) - end = 0; - else if (end > size) - end = size; - - this.start = start; - this.end = end; - - this.pos = start; - this.endpos = end; - - state_reset(); - - this.flags = flags; + } + return false; } - - private void mark_fini() { mark_stack = null; mark_stack_size = mark_stack_base = 0; } - private void mark_save(int lo, int hi) { if (hi <= lo) return; @@ -231,12 +297,9 @@ System.arraycopy(mark_stack, mark_stack_base, mark, lo, size); } - - - + final boolean SRE_AT(int ptr, char at) { - /* check if pointer is at given position. return 1 if so, 0 - otherwise */ + /* check if pointer is at given position. */ boolean thiS, that; @@ -294,12 +357,8 @@ return false; } - - - final boolean SRE_CHARSET(char[] set, int setidx, char ch) { - /* check if character is a member of the given set. return 1 if - so, 0 otherwise */ + /* check if character is a member of the given set. */ boolean ok = true; @@ -368,10 +427,7 @@ } } } - - - - + private int SRE_COUNT(char[] pattern, int pidx, int maxcount, int level) { char chr; int ptr = this.ptr; @@ -453,8 +509,6 @@ return ptr - this.ptr; } - - final int SRE_MATCH(char[] pattern, int pidx, int level) { /* check if string matches the given pattern. returns <0 for error, 0 for failure, and 1 for success */ @@ -771,8 +825,73 @@ } } return 0; + + case SRE_OP_MIN_REPEAT_ONE: + /* match repeated sequence (minimizing regexp) */ + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MIN_REPEAT operator */ + /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ + + mincount = pattern[pidx+1]; + + //TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, + // pattern[1], pattern[2])); + + if (ptr + mincount > end) + return 0; /* cannot match */ + + this.ptr = ptr; + + if (pattern[pidx+1] == 0) + count = 0; + else { + + count = SRE_COUNT(pattern, pidx + 3, pattern[pidx+1], + level + 1); + + if (count < 0) + return count; /* exception */ + if (count < (int) pattern[1]) + return 0; /* did not match minimum number of times */ + ptr += count; /* advance past minimum matches of repeat */ + } + + if (pattern[pidx + pattern[pidx]] == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + this.ptr = ptr; + return 1; + + } else { + /* general case */ + boolean matchmax = ((int)pattern[pidx + 2] == 65535); + int c; + lastmark = this.lastmark; + while (matchmax || count <= (int) pattern[pidx + 2]) { + this.ptr = ptr; + i = SRE_MATCH(pattern, pidx + pattern[pidx], level + 1); + if (i != 0) + return i; + this.ptr = ptr; + c = SRE_COUNT(pattern, pidx+3, 1, level+1); + if (c < 0) + return c; + if (c == 0) + break; + if(c != 1){ + throw new IllegalStateException("c should be 1!"); + } + ptr++; + count++; + while (this.lastmark > lastmark) + mark[this.lastmark--] = -1; + } + } + return 0; + case SRE_OP_REPEAT: /* create repeat context. all the hard work is done by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ @@ -900,17 +1019,13 @@ default: - //TRACE(pidx, ptr, "UNKNOWN " + (int) pattern[pidx-1]); + TRACE(pidx, ptr, "UNKNOWN " + (int) pattern[pidx-1]); return SRE_ERROR_ILLEGAL; } } //return SRE_ERROR_ILLEGAL; } - - - - int SRE_SEARCH(char[] pattern, int pidx) { int ptr = this.start; int end = this.end; @@ -1040,137 +1155,71 @@ } - final boolean sre_category(char category, char ch) { - switch (category) { + /* string pointers */ + int ptr; /* current position (also end of current slice) */ + int beginning; /* start of original string */ + int start; /* start of current slice */ + int end; /* end of original string */ - case SRE_CATEGORY_DIGIT: - return SRE_IS_DIGIT(ch); - case SRE_CATEGORY_NOT_DIGIT: - return ! SRE_IS_DIGIT(ch); + /* attributes for the match object */ + char[] str; + int pos; + int endpos; - case SRE_CATEGORY_SPACE: - return SRE_IS_SPACE(ch); - case SRE_CATEGORY_NOT_SPACE: - return ! SRE_IS_SPACE(ch); + /* character size */ + int charsize; - case SRE_CATEGORY_WORD: - return SRE_IS_WORD(ch); - case SRE_CATEGORY_NOT_WORD: - return ! SRE_IS_WORD(ch); + /* registers */ + int lastindex; + int lastmark; - case SRE_CATEGORY_LINEBREAK: - return SRE_IS_LINEBREAK(ch); - case SRE_CATEGORY_NOT_LINEBREAK: - return ! SRE_IS_LINEBREAK(ch); + /* FIXME: <fl> should be dynamically allocated! */ + int[] mark = new int[200]; - case SRE_CATEGORY_LOC_WORD: - return SRE_LOC_IS_WORD(ch); - case SRE_CATEGORY_LOC_NOT_WORD: - return ! SRE_LOC_IS_WORD(ch); + /* dynamically allocated stuff */ + int[] mark_stack; + int mark_stack_size; + int mark_stack_base; + SRE_REPEAT repeat; /* current repeat context */ - case SRE_CATEGORY_UNI_DIGIT: - return Character.isDigit(ch); - case SRE_CATEGORY_UNI_NOT_DIGIT: - return !Character.isDigit(ch); + /* debugging */ + int maxlevel; - case SRE_CATEGORY_UNI_SPACE: - return Character.isWhitespace(ch); - case SRE_CATEGORY_UNI_NOT_SPACE: - return !Character.isWhitespace(ch); + /* duplicated from the PatternObject */ + int flags; - case SRE_CATEGORY_UNI_WORD: - return Character.isLetterOrDigit(ch) || ch == '_'; - case SRE_CATEGORY_UNI_NOT_WORD: - return ! (Character.isLetterOrDigit(ch) || ch == '_'); - case SRE_CATEGORY_UNI_LINEBREAK: - return SRE_UNI_IS_LINEBREAK(ch); - case SRE_CATEGORY_UNI_NOT_LINEBREAK: - return ! SRE_UNI_IS_LINEBREAK(ch); - } - return false; - } + public SRE_STATE(String str, int start, int end, int flags) { + this.str = str.toCharArray(); + int size = str.length(); - /* default character predicates (run sre_chars.py to regenerate tables) */ + this.charsize = 1; - static final int SRE_DIGIT_MASK = 1; - static final int SRE_SPACE_MASK = 2; - static final int SRE_LINEBREAK_MASK = 4; - static final int SRE_ALNUM_MASK = 8; - static final int SRE_WORD_MASK = 16; + /* adjust boundaries */ + if (start < 0) + start = 0; + else if (start > size) + start = size; - static byte[] sre_char_info = new byte[] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, - 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, - 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; + if (end < 0) + end = 0; + else if (end > size) + end = size; - static byte[] sre_char_lower = new byte[] { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, - 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, - 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127 }; + this.start = start; + this.end = end; - final boolean SRE_IS_DIGIT(char ch) { - return ((ch) < 128 ? - (sre_char_info[(ch)] & SRE_DIGIT_MASK) != 0 : false); - } + this.pos = start; + this.endpos = end; - final boolean SRE_IS_SPACE(char ch) { - return ((ch) < 128 ? - (sre_char_info[(ch)] & SRE_SPACE_MASK) != 0 : false); - } + state_reset(); - final boolean SRE_IS_WORD(char ch) { - return ((ch) < 128 ? - (sre_char_info[(ch)] & SRE_WORD_MASK) != 0 : false); + this.flags = flags; } - final boolean SRE_IS_LINEBREAK(char ch) { - return ch == '\n'; - } - - final boolean SRE_LOC_IS_WORD(char ch) { - return Character.isLetterOrDigit(ch) || ch == '_'; - } - - final boolean SRE_UNI_IS_LINEBREAK(char ch) { - switch (ch) { - case 0x000A: /* LINE FEED */ - case 0x000D: /* CARRIAGE RETURN */ - case 0x001C: /* FILE SEPARATOR */ - case 0x001D: /* GROUP SEPARATOR */ - case 0x001E: /* RECORD SEPARATOR */ - case 0x0085: /* NEXT LINE */ - case 0x2028: /* LINE SEPARATOR */ - case 0x2029: /* PARAGRAPH SEPARATOR */ - return true; - default: - return false; - } - } - - - final char lower(char ch) { - if ((flags & SRE_FLAG_LOCALE) != 0) - return ((ch) < 256 ? Character.toLowerCase(ch) : ch); - if ((flags & SRE_FLAG_UNICODE) != 0) - return Character.toLowerCase(ch); - return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); - } - - public static int getlower(int ch, int flags) { if ((flags & SRE_FLAG_LOCALE) != 0) return ((ch) < 256 ? Character.toLowerCase((char) ch) : ch); @@ -1179,10 +1228,6 @@ return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); } - - - - String getslice(int index, String string, boolean empty) { int i, j; @@ -1203,9 +1248,6 @@ return string.substring(i, j); } - - - void state_reset() { lastmark = 0; @@ -1219,8 +1261,7 @@ mark_fini(); } - private void TRACE(int pidx, int ptr, String string) { - //System.out.println(" |" + pidx + "|" + ptr + ": " + string); + System.out.println(" |" + pidx + "|" + ptr + ": " + string); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cg...@us...> - 2006-07-20 12:39:50
|
Revision: 2852 Author: cgroves Date: 2006-07-20 05:39:44 -0700 (Thu, 20 Jul 2006) ViewCVS: http://svn.sourceforge.net/jython/?rev=2852&view=rev Log Message: ----------- SRE_STATE.java's MATCH, SEARCH, and COUNT are up to the versions in Python 2.3.5's _sre.c Added regs, lastgroup and expand to PatternObject and pattern to ScannerObject. Modified Paths: -------------- branches/2.3/src/org/python/modules/_sre.java branches/2.3/src/org/python/modules/sre/MatchObject.java branches/2.3/src/org/python/modules/sre/SRE_STATE.java branches/2.3/src/org/python/modules/sre/ScannerObject.java Modified: branches/2.3/src/org/python/modules/_sre.java =================================================================== --- branches/2.3/src/org/python/modules/_sre.java 2006-07-20 00:59:23 UTC (rev 2851) +++ branches/2.3/src/org/python/modules/_sre.java 2006-07-20 12:39:44 UTC (rev 2852) @@ -25,7 +25,6 @@ public class _sre { - // update when constants are added or removed public static int MAGIC = SRE_STATE.SRE_MAGIC; //XXX: May not be the right size, but I suspect it is -- see sre_compile.py Modified: branches/2.3/src/org/python/modules/sre/MatchObject.java =================================================================== --- branches/2.3/src/org/python/modules/sre/MatchObject.java 2006-07-20 00:59:23 UTC (rev 2851) +++ branches/2.3/src/org/python/modules/sre/MatchObject.java 2006-07-20 12:39:44 UTC (rev 2852) @@ -27,6 +27,15 @@ int groups; /* number of groups (start/end marks) */ int[] mark; + + public PyObject expand(PyObject[] args) { + if(args.length == 0) { + throw Py.TypeError("expand() takes exactly 1 argument (0 given)"); + } + PyObject mod = imp.importName("sre", true); + PyObject func = mod.__getattr__("_expand"); + return func.__call__(new PyObject[] {pattern, this, args[0]}); + } public PyObject group(PyObject[] args) { switch (args.length) { @@ -182,6 +191,14 @@ return Py.newInteger(endpos); if (key == "lastindex") return lastindex == -1 ? Py.None : Py.newInteger(lastindex); + if (key == "lastgroup"){ + if(pattern.indexgroup != null && lastindex >= 0) + return pattern.indexgroup.__getitem__(lastindex); + return Py.None; + } + if ( key == "regs" ){ + return regs(); + } return super.__findattr__(key); } } Modified: branches/2.3/src/org/python/modules/sre/SRE_STATE.java =================================================================== --- branches/2.3/src/org/python/modules/sre/SRE_STATE.java 2006-07-20 00:59:23 UTC (rev 2851) +++ branches/2.3/src/org/python/modules/sre/SRE_STATE.java 2006-07-20 12:39:44 UTC (rev 2852) @@ -246,9 +246,9 @@ mark_stack_size = mark_stack_base = 0; } - private void mark_save(int lo, int hi) { + private int mark_save(int lo, int hi) { if (hi <= lo) - return; + return mark_stack_base; int size = (hi - lo) + 1; @@ -282,20 +282,21 @@ System.arraycopy(mark, lo, mark_stack, mark_stack_base, size); mark_stack_base += size; + return mark_stack_base; } - - private void mark_restore(int lo, int hi) { + private void mark_restore(int lo, int hi, int mark_stack_base) { + if (hi <= lo) return; int size = (hi - lo) + 1; - mark_stack_base -= size; + this.mark_stack_base = mark_stack_base - size; //TRACE(0, ptr, "copy " + lo + ":" + hi + " from " + mark_stack_base); - System.arraycopy(mark_stack, mark_stack_base, mark, lo, size); + System.arraycopy(mark_stack, this.mark_stack_base, mark, lo, size); } final boolean SRE_AT(int ptr, char at) { @@ -518,7 +519,7 @@ int i, count; char chr; - int lastmark; + int lastmark, lastindex, mark_stack_base = 0; //TRACE(pidx, ptr, "ENTER " + level); @@ -684,7 +685,7 @@ //TRACE(pidx, ptr, "MARK " + (int) pattern[pidx]); i = pattern[pidx]; if ((i & 1) != 0) - lastindex = i / 2 + 1; + this.lastindex = i / 2 + 1; if (i > this.lastmark) this.lastmark = i; mark[i] = ptr; @@ -730,26 +731,33 @@ case SRE_OP_BRANCH: /* try an alternate branch */ - /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ - //TRACE(pidx, ptr, "BRANCH"); - lastmark = this.lastmark; - for (; pattern[pidx] != 0; pidx += pattern[pidx]) { - if (pattern[pidx+1] == SRE_OP_LITERAL && - (ptr >= end || str[ptr] != pattern[pidx+2])) - continue; - if (pattern[pidx+1] == SRE_OP_IN && (ptr >= end || - !SRE_CHARSET(pattern, pidx + 3, str[ptr]))) - continue; - this.ptr = ptr; - i = SRE_MATCH(pattern, pidx + 1, level + 1); - if (i != 0) - return i; - while (this.lastmark > lastmark) - mark[this.lastmark--] = -1; - } + /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ + // TRACE(pidx, ptr, "BRANCH"); + lastmark = this.lastmark; + lastindex = this.lastindex; + if(this.repeat != null) { + mark_stack_base = mark_save(0, lastmark); + } + for(; pattern[pidx] != 0; pidx += pattern[pidx]) { + if(pattern[pidx + 1] == SRE_OP_LITERAL + && (ptr >= end || str[ptr] != pattern[pidx + 2])) + continue; + if(pattern[pidx + 1] == SRE_OP_IN + && (ptr >= end || !SRE_CHARSET(pattern, + pidx + 3, + str[ptr]))) + continue; + this.ptr = ptr; + i = SRE_MATCH(pattern, pidx + 1, level + 1); + if(i != 0) + return i; + if(this.repeat != null) { + mark_restore(0, lastmark, mark_stack_base); + } + LASTMARK_RESTORE(lastmark, lastindex); + } + return 0; - return 0; - case SRE_OP_REPEAT_ONE: /* match repeated sequence (maximizing regexp) */ @@ -787,8 +795,11 @@ /* tail is empty. we're finished */ this.ptr = ptr; return 1; - - } else if (pattern[pidx + pattern[pidx]] == SRE_OP_LITERAL) { + } + lastmark = this.lastmark; + lastindex = this.lastindex; + + if (pattern[pidx + pattern[pidx]] == SRE_OP_LITERAL) { /* tail starts with a literal. skip positions where the rest of the pattern cannot possibly match */ chr = pattern[pidx + pattern[pidx]+1]; @@ -807,6 +818,7 @@ return 1; ptr--; count--; + LASTMARK_RESTORE(lastmark, lastindex); } } else { @@ -820,8 +832,7 @@ return i; ptr--; count--; - while (this.lastmark > lastmark) - mark[this.lastmark--] = -1; + LASTMARK_RESTORE(lastmark, lastindex); } } return 0; @@ -838,8 +849,7 @@ mincount = pattern[pidx+1]; - //TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, - // pattern[1], pattern[2])); + //TRACE(pidx, ptr, "MIN_REPEAT_ONE"); if (ptr + mincount > end) return 0; /* cannot match */ @@ -870,6 +880,7 @@ boolean matchmax = ((int)pattern[pidx + 2] == 65535); int c; lastmark = this.lastmark; + lastindex = this.lastindex; while (matchmax || count <= (int) pattern[pidx + 2]) { this.ptr = ptr; i = SRE_MATCH(pattern, pidx + pattern[pidx], level + 1); @@ -886,8 +897,7 @@ } ptr++; count++; - while (this.lastmark > lastmark) - mark[this.lastmark--] = -1; + LASTMARK_RESTORE(lastmark, lastindex); } } return 0; @@ -947,13 +957,14 @@ match another item, do so */ rp.count = count; lastmark = this.lastmark; - mark_save(0, lastmark); + lastindex = this.lastindex; + mark_stack_base = mark_save(0, lastmark); /* RECURSIVE */ i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); if (i != 0) return i; - mark_restore(0, lastmark); - this.lastmark = lastmark; + mark_restore(0, lastmark, mark_stack_base); + LASTMARK_RESTORE(lastmark, lastindex); rp.count = count - 1; this.ptr = ptr; } @@ -977,12 +988,12 @@ if (rp == null) return SRE_ERROR_STATE; + this.ptr = ptr; + count = rp.count + 1; - //TRACE(pidx, ptr, "MIN_UNTIL " + count + " " + rp.pidx); + //TRACE(pidx, ptr, "MIN_UNTIL " + count + " " + rp.pidx); - this.ptr = ptr; - if (count < pattern[rp.pidx + 1]) { /* not enough matches */ rp.count = count; @@ -994,6 +1005,9 @@ this.ptr = ptr; return 0; } + + lastmark = this.lastmark; + lastindex = this.lastindex; /* see if the tail matches */ this.repeat = rp.prev; @@ -1007,6 +1021,7 @@ if (count >= pattern[rp.pidx+2] && pattern[rp.pidx+2] != 65535) return 0; + LASTMARK_RESTORE(lastmark, lastindex); rp.count = count; /* RECURSIVE */ @@ -1019,13 +1034,23 @@ default: - TRACE(pidx, ptr, "UNKNOWN " + (int) pattern[pidx-1]); + //TRACE(pidx, ptr, "UNKNOWN " + (int) pattern[pidx-1]); return SRE_ERROR_ILLEGAL; } } - //return SRE_ERROR_ILLEGAL; + /* can't end up here */ + /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */ } + + private void LASTMARK_RESTORE(int lastmark, int lastindex) { + if(this.lastmark > lastmark) { + while(this.lastmark > lastmark) + mark[this.lastmark--] = -1; + this.lastindex = lastindex; + } + } + int SRE_SEARCH(char[] pattern, int pidx) { int ptr = this.start; int end = this.end; @@ -1043,7 +1068,7 @@ flags = pattern[pidx+2]; - if (pattern[pidx+3] > 0) { + if (pattern[pidx+3] > 1) { /* adjust end point (but make sure we leave at least one character in there, so literal search will work) */ end -= pattern[pidx+3]-1; Modified: branches/2.3/src/org/python/modules/sre/ScannerObject.java =================================================================== --- branches/2.3/src/org/python/modules/sre/ScannerObject.java 2006-07-20 00:59:23 UTC (rev 2851) +++ branches/2.3/src/org/python/modules/sre/ScannerObject.java 2006-07-20 12:39:44 UTC (rev 2852) @@ -18,7 +18,7 @@ import org.python.core.*; public class ScannerObject extends PyObject { - PatternObject pattern; + public PatternObject pattern; String string; SRE_STATE state; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <cg...@us...> - 2007-02-15 21:12:11
|
Revision: 3106 http://svn.sourceforge.net/jython/?rev=3106&view=rev Author: cgroves Date: 2007-02-15 13:12:08 -0800 (Thu, 15 Feb 2007) Log Message: ----------- h_eriksson's itertools module from patch #1608656. I removed the unused toInt method that was included in the patch. Modified Paths: -------------- branches/2.3/src/org/python/modules/Setup.java Added Paths: ----------- branches/2.3/src/org/python/modules/itertools.java Modified: branches/2.3/src/org/python/modules/Setup.java =================================================================== --- branches/2.3/src/org/python/modules/Setup.java 2007-02-13 06:27:23 UTC (rev 3105) +++ branches/2.3/src/org/python/modules/Setup.java 2007-02-15 21:12:08 UTC (rev 3106) @@ -54,6 +54,7 @@ "array:org.python.modules.ArrayModule", "sets:org.python.modules.sets.Sets", "_random:org.python.modules.random.RandomModule", - "cmath" + "cmath", + "itertools" }; } Added: branches/2.3/src/org/python/modules/itertools.java =================================================================== --- branches/2.3/src/org/python/modules/itertools.java (rev 0) +++ branches/2.3/src/org/python/modules/itertools.java 2007-02-15 21:12:08 UTC (rev 3106) @@ -0,0 +1,648 @@ +package org.python.modules; + +import java.util.ArrayList; +import java.util.List; +import org.python.core.ClassDictInit; +import org.python.core.Py; +import org.python.core.PyException; +import org.python.core.PyInteger; +import org.python.core.PyIterator; +import org.python.core.PyNone; +import org.python.core.PyObject; +import org.python.core.PyString; +import org.python.core.PyTuple; + +/** + * Functional tools for creating and using iterators. Java implementation of the + * CPython module itertools. + * + * @author Henrik Eriksson <hen...@gm...> + * @since 2.3 + */ +public class itertools implements ClassDictInit { + + public static PyString __doc__ = new PyString( + "Functional tools for creating and using iterators.\n\nInfinite iterators:\n" + + "count([n]) --> n, n+1, n+2, ...\ncycle(p) --> p0, p1, ... plast, p0, p1, ...\n" + + "repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\n" + + "Iterators terminating on the shortest input sequence:" + + "\nizip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n" + + "ifilter(pred, seq) --> elements of seq where pred(elem) is True\n" + + "ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n" + + "islice(seq, [start,] stop [, step]) --> elements from\n seq[start:stop:step]\n" + + "imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n" + + "starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n" + + "chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n" + + "takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n" + + "dropwhile(pred, seq) --> seq[n],seq[n+1], starting when pred fails\n"); + + /** + * Iterator base class used by most methods. + */ + static abstract class ItertoolsIterator extends PyIterator { + + /** + * Returns the next element from an iterator. If it raises/throws + * StopIteration just store the Exception and return null according to + * PyIterator practice. + * + * @param pyIter + * @return + */ + protected PyObject nextElement(PyObject pyIter) { + PyObject element = null; + try { + element = pyIter.__iternext__();//next(); + } catch (PyException pyEx) { + if (Py.matchException(pyEx, Py.StopIteration)) { + // store exception - will be used by PyIterator.next() + stopException = pyEx; + } else { + throw pyEx; + } + } + return element; + } + } + + public static void classDictInit(PyObject dict) { + } + + + public static PyString __doc__count = new PyString( + "count([firstval]) --> count object\n\nReturn a count object whose .next() " + + "method returns consecutive\nintegers starting from zero or, if specified, from firstval."); + + /** + * Creates an iterator that returns consecutive integers starting at + * <code>init</code>. + * + * @param init + * @return + */ + public static PyIterator count(final int init) { + return new PyIterator() { + int counter = init; + + public PyObject __iternext__() { + return new PyInteger(counter++); + } + + }; + } + + /** + * Creates an iterator that returns consecutive integers starting at 0. + * + * @return + */ + public static PyIterator count() { + return itertools.count(0); + } + + public static PyString __doc__cycle = new PyString( + "cycle(iterable) --> cycle object\n\nReturn elements from the iterable " + + "until itis exhausted.\nThen repeat the sequence indefinitely."); + + /** + * Returns an iterator that iterates over an iterable, saving the values for + * each iteration. When the iterable is exhausted continues to iterate over + * the saved values indefinitely. + * + * @param sequence + * @return + */ + public static PyIterator cycle(final PyObject sequence) { + return new ItertoolsIterator() { + List saved = new ArrayList(); + int counter = 0; + PyObject iter = sequence.__iter__(); + + boolean save = true; + + public PyObject __iternext__() { + + if(save) { + PyObject obj = nextElement(iter); + if (obj != null) { + saved.add(obj); + return obj; + } else { + save = false; + } + } + if(saved.size() == 0) { + return null; + } + + // pick element from saved List + if(counter >= saved.size()) { + // start over again + counter = 0; + } + return (PyObject) saved.get(counter++); + } + + }; + } + + + + public static PyString __doc__chain = new PyString( + "chain(*iterables) --> chain object\n\nReturn a chain object " + + "whose .next() method returns elements from the\nfirst iterable until it is exhausted, then elements" + + " from the next\niterable, until all of the iterables are exhausted."); + + /** + * Creates an iterator that iterates over a <i>chain</i> of iterables. + * + * @param iterables + * @return + */ + public static PyIterator chain(final PyObject[] iterables) { + final PyObject[] iterators = new PyObject[iterables.length]; + for (int i = 0; i < iterables.length; i++) { + iterators[i] = iterables[i].__iter__(); + } + + return new ItertoolsIterator() { + int iteratorIndex = 0; + + public PyObject __iternext__() { + if (iteratorIndex >= iterators.length) { + return null; + } + PyObject obj = nextElement(iterators[iteratorIndex]); + + if (obj == null) { + // increase the iteratorIndex and see if we have more + // iterators to work with + iteratorIndex++; + if (iteratorIndex < iterators.length) { + obj = nextElement(iterators[iteratorIndex]); + } + } + return obj; + } + + }; + } + + public static PyString __doc__repeat = new PyString( + "'repeat(element [,times]) -> create an iterator which returns the element\n" + + "for the specified number of times. If not specified, returns the element\nendlessly."); + + /** + * Creates an iterator that returns the same object the number of times + * given by <code>times</code>. + * + * @param object + * @param times + * @return + */ + public static PyIterator repeat(final PyObject object, final int times) { + return new PyIterator() { + int counter = 0; + + public PyObject __iternext__() { + if (counter < times) { + counter++; + return object; + } + return null; + } + }; + } + + /** + * Creates an iterator that returns the same object over and over again. + * + * @param object + * @return + */ + public static PyIterator repeat(final PyObject object) { + return new PyIterator() { + public PyObject __iternext__() { + return object; + } + }; + } + + public static PyString __doc__imap = new PyString( + "'map(func, *iterables) --> imap object\n\nMake an iterator that computes the " + + "function using arguments from\neach of the iterables.\tLike map() except that it returns\n" + + "an iterator instead of a list and that it stops when the shortest\niterable is exhausted " + + "instead of filling in None for shorter\niterables."); + + /** + * Works as <code>__builtin__.map()</code> but returns an iterator instead + * of a list. (Code in this method is based on __builtin__.map()). + * + * @param argstar + * @return + */ + public static PyIterator imap(PyObject[] argstar) { + final int n = argstar.length - 1; + if (n < 1) { + throw Py.TypeError("imap requires at least two arguments"); + } + + final PyObject callable = argstar[0]; + final PyObject[] iters = new PyObject[n]; + + for (int j = 0; j < n; j++) { + iters[j] = Py.iter(argstar[j + 1], "argument " + (j + 1) + + " to imap() must support iteration"); + } + + return new PyIterator() { + PyObject[] args = new PyObject[n]; + + PyObject element = null; + + public PyObject __iternext__() { + + for (int i = 0; i < n; i++) { + if ((element = iters[i].__iternext__()) != null) { + // collect the arguments for the callable + args[i] = element; + } else { + // break iteration + return null; + } + } + if (callable == Py.None) { + // if None is supplied as callable we just return what's in + // the iterable(s) + if (n == 1) { + return args[0]; + } else { + return new PyTuple((PyObject[]) args.clone()); + } + } else { + return callable.__call__(args); + } + } + }; + } + + public static PyString __doc__islice = new PyString( + "islice(iterable, [start,] stop [, step]) --> islice object\n" + + "\nReturn an iterator whose next() method returns selected values from an\n" + + "iterable. If start is specified, will skip all preceding elements;\notherwise, start defaults to zero." + + "Step defaults to one. If\nspecified as another value, step determines how manyvalues are \n" + + "skipped between successive calls. Works like a slice() on a list\nbut returns an iterator."); + + /** + * Creates an iterator that returns selected values from an iterable. + * + * @param iterable + * @param startObj + * the index of where in the iterable to start returning values + * @param stopObj + * the index of where in the iterable to stop returning values + * @param stepObj + * the number of steps to take beween each call to + * <code>next()</code> + * @return + */ + public static PyIterator islice(final PyObject iterable, PyObject startObj, + PyObject stopObj, PyObject stepObj) { + final int stop; + final int start; + final int step; + // flag for indicating that None was supplied for stop + final boolean stopNone; + + // convert input parameters + if (stopObj instanceof PyNone) { + stopNone = true; + stop = 0; + } else { + stopNone = false; + stop = Py.py2int(stopObj, + "Stop argument must be a non-negative integer or None"); + } + + start = Py.py2int(startObj, "Start argument must be a non-negative integer"); + step = Py.py2int(stepObj, "Step argument must be a non-negative integer"); + + // validate input parameters + if (start < 0 || step < 0 || stop < 0) { + throw Py.ValueError("Indices for islice() must be non-negative integers"); + } + + if (step == 0) { + throw Py.ValueError("Step must be one or larger for islice()"); + } + + return new ItertoolsIterator() { + int counter = start; + + int lastCount = 0; + + PyObject iter = iterable.__iter__(); + + public PyObject __iternext__() { + PyObject result = null; + + if (counter >= stop && !stopNone) { + return null; + } + + while (lastCount <= counter) { + result = nextElement(iter); + lastCount++; + } + counter += step; + return result; + } + + }; + + } + + /** + * @see islice(PyObject PyObject iterable, PyObject startObj, PyObject + * stopObj, PyObject stepObj) startObj defaults to 0 and stepObj to 1 + * @param iterable + * @param stopObj + * @return + */ + public static PyIterator islice(PyObject iterable, PyObject stopObj) { + return islice(iterable, new PyInteger(0), stopObj, new PyInteger(1)); + } + + /** + * @see islice(PyObject PyObject iterable, PyObject startObj, PyObject + * stopObj, PyObject stepObj) stepObj defaults to 1 + * @param iterable + * @param start + * @param stopObj + * @return + */ + public static PyIterator islice(PyObject iterable, PyObject start, + PyObject stopObj) { + return islice(iterable, start, stopObj, new PyInteger(1)); + } + + /** + * Iterator base class for iterators returned by <code>ifilter</code> and + * <code>ifilterfalse</code>. + */ + static class FilterIterator extends ItertoolsIterator { + private PyObject predicate; + + private PyObject iterator; + + private boolean filterTrue; + + FilterIterator(PyObject predicate, PyObject iterable, boolean filterTrue) { + if (predicate instanceof PyNone) { + this.predicate = null; + } else { + this.predicate = predicate; + } + this.iterator = iterable.__iter__(); + this.filterTrue = filterTrue; + } + + public PyObject __iternext__() { + + while (true) { + PyObject element = nextElement(iterator); + if (element != null) { + // the boolean value of calling predicate with the element + // or if predicate is null/None of the element itself + boolean booleanValue = predicate != null ? predicate + .__call__(element).__nonzero__() : element + .__nonzero__(); + if (booleanValue == filterTrue) { + // if the boolean value is the same as filterTrue return + // the element + // for ifilter filterTrue is always true, for + // ifilterfalse always false + return element; + } + } else { + return null; + } + } + } + } + + public static PyString __doc__ifilter = new PyString( + "ifilter(function or None, sequence) --> ifilter object\n\n" + + "Return those items of sequence for which function(item) is true.\nIf function is None, " + + "return the items that are true."); + + /** + * Creates an iterator that returns the items of the iterable for which + * <code>predicate(item)</code> is <code>true</code>. If + * <code>predicate</code> is null (None) return the items that are true. + * + * @param predicate + * @param iterable + * @return + */ + public static PyIterator ifilter(PyObject predicate, PyObject iterable) { + return new FilterIterator(predicate, iterable, true); + } + + public static PyString __doc__ifilterfalse = new PyString( + "'ifilterfalse(function or None, sequence) --> ifilterfalse object\n\n" + + "Return those items of sequence for which function(item) is false.\nIf function is None, " + + "return the items that are false.'"); + + /** + * Creates an iterator that returns the items of the iterable for which + * <code>predicate(item)</code> is <code>false</code>. If + * <code>predicate</code> is null (None) return the items that are false. + * + * @param predicate + * @param iterable + * @return + */ + public static PyIterator ifilterfalse(PyObject predicate, PyObject iterable) { + return new FilterIterator(predicate, iterable, false); + } + + public static PyString __doc__izip = new PyString( + "izip(iter1 [,iter2 [...]]) --> izip object\n\nReturn an izip object " + + "whose .next() method returns a tuple where\nthe i-th element comes from the i-th iterable argument. " + + "The .next()\nmethod continues until the shortest iterable in the argument sequence\nis exhausted and then it " + + "raises StopIteration. Works like the zip()\nfunction but consumes less memory by returning an iterator " + + "instead of\na list."); + + /** + * Create an iterator whose <code>next()</code> method returns a tuple + * where the i-th element comes from the i-th iterable argument. Continues + * until the shortest iterable is exhausted. (Code in this method is based + * on __builtin__.zip()). + * + * @param argstar + * @return + */ + public static PyIterator izip(PyObject[] argstar) { + final int itemsize = argstar.length; + + // Type check the arguments; they must be sequences. + final PyObject[] iters = new PyObject[itemsize]; + + for (int i = 0; i < itemsize; i++) { + PyObject iter = argstar[i].__iter__(); + if (iter == null) { + throw Py.TypeError("izip argument #" + (i + 1) + + " must support iteration"); + } + iters[i] = iter; + } + + return new ItertoolsIterator() { + + public PyObject __iternext__() { + if (itemsize == 0) + return null; + + PyObject[] next = new PyObject[itemsize]; + PyObject item; + + for (int i = 0; i < itemsize; i++) { + + item = nextElement(iters[i]); + + if (item == null) { + return null; + } + next[i] = item; + } + return new PyTuple(next); + } + + }; + + } + + public static PyString __doc__starmap = new PyString( + "starmap(function, sequence) --> starmap object\n\nReturn an " + + "iterator whose values are returned from the function evaluated\nwith an argument tuple taken from the " + + "given sequence."); + + /** + * Create an iterator whose <code>next()</code> method returns the result + * of calling the function (first argument) with a tuple of arguments + * returned from the iterable (second argument). + * + * @param starargs + * [0] = callable function, [1] = iterable with argument tuples + * @return + */ + public static PyIterator starmap(PyObject[] starargs) { + if (starargs.length != 2) { + throw Py.TypeError("starmap requires 2 arguments, got " + + starargs.length); + } + final PyObject callable = starargs[0]; + final PyObject iterator = starargs[1].__iter__(); + + return new ItertoolsIterator() { + + public PyObject __iternext__() { + PyObject args = nextElement(iterator); + PyObject result = null; + + if (args != null) { + if (!args.getClass().isAssignableFrom(PyTuple.class)) { + throw Py.TypeError("iterator must return a tuple"); + } + PyTuple argTuple = (PyTuple) args; + // convert to array of PyObjects in call to function + result = callable.__call__(argTuple.getArray()); + } + return result; + } + + }; + } + + /** + * Iterator base class used by <code>dropwhile()</code> and <code>takewhile</code>. + */ + static class WhileIterator extends ItertoolsIterator { + private PyObject iterator; + + private PyObject predicate; + + // flag that indicates if the iterator shoul drop or return arguments "while" the predicate is true + private boolean drop; + + // flag that is set once the predicate is satisfied + private boolean predicateSatisfied; + + WhileIterator(PyObject predicate, PyObject iterable, boolean drop) { + this.predicate = predicate; + iterator = iterable.__iter__(); + this.drop = drop; + } + + public PyObject __iternext__() { + + while (true) { + PyObject element = nextElement(iterator); + if (element != null) { + if (!predicateSatisfied) { + // the predicate is not satisfied yet (or still satisfied in the case of drop beeing + // false), so we need to check it + if (predicate.__call__(element).__nonzero__() != drop) { + predicateSatisfied = drop; + return element; + } + predicateSatisfied = !drop; + } else { + if (drop) { + return element; + } else { + // end iteration if predicate is false and drop is false + return null; + } + } + } else { + // end iteration + return null; + } + + } + } + } + + public static PyString __doc__dropwhile = new PyString( + "dropwhile(predicate, iterable) --> dropwhile object\n\nDrop items " + + "from the iterable while predicate(item) is true.\nAfterwards, return every element until theiterable is exhausted."); + + /** + * Create an iterator that drops items from the iterable while <code>prdicate(item)</code> equals true. After which + * every remaining item of the iterable is returned. + * @param predicate + * @param iterable + * @return + */ + public static PyIterator dropwhile(PyObject predicate, PyObject iterable) { + return new WhileIterator(predicate, iterable, true); + } + + public static PyString __doc__takewhile = new PyString( + "takewhile(predicate, iterable) --> takewhile object\n\nReturn " + + "successive entries from an iterable as long as the \npredicate evaluates to true for each entry."); + + /** + * Create an iterator that returns items from the iterable while <code>predicate(item)</code> is true. After which + * iteration is stopped. + * @param predicate + * @param iterable + * @return + */ + public static PyIterator takewhile(PyObject predicate, PyObject iterable) { + return new WhileIterator(predicate, iterable, false); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |