[Mp4h-devel]CVS: mp4h/pcre dftables.c,1.1,1.2 internal.h,1.2,1.3 maketables.c,1.1,1.2 pcre.c,1.2,1.3
Brought to you by:
barbier
|
From: Denis B. <ba...@us...> - 2001-11-08 22:55:51
|
Update of /cvsroot/mp4h/mp4h/pcre
In directory usw-pr-cvs1:/tmp/cvs-serv22896/pcre
Modified Files:
dftables.c internal.h maketables.c pcre.c pcre.h study.c
Log Message:
*) Upgrade to PCRE 3.7 (08-Nov-2001):
Upstream do allow various definitions of newline char, I keep
stuck with `\n' in internal.h since I know exactly how to
incorporate this option.
[Denis Barbier]
Index: dftables.c
===================================================================
RCS file: /cvsroot/mp4h/mp4h/pcre/dftables.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- dftables.c 2000/07/19 21:43:52 1.1
+++ dftables.c 2001/11/08 22:55:47 1.2
@@ -8,7 +8,7 @@
Written by: Philip Hazel <ph...@ca...>
- Copyright (c) 1997-2000 University of Cambridge
+ Copyright (c) 1997-2001 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -53,7 +53,7 @@
int main(void)
{
int i;
-unsigned const char *tables = pcre_maketables();
+const unsigned char *tables = pcre_maketables();
printf(
"/*************************************************\n"
Index: internal.h
===================================================================
RCS file: /cvsroot/mp4h/mp4h/pcre/internal.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- internal.h 2001/08/12 18:52:42 1.2
+++ internal.h 2001/11/08 22:55:47 1.3
@@ -9,7 +9,7 @@
Written by: Philip Hazel <ph...@ca...>
- Copyright (c) 1997-2000 University of Cambridge
+ Copyright (c) 1997-2001 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -123,12 +123,40 @@
#define FALSE 0
#define TRUE 1
+/* Escape items that are just an encoding of a particular data value. Note that
+ESC_N is defined as yet another macro, which is set in config.h to either \n
+(the default) or \r (which some people want). */
+
+#ifndef ESC_E
+#define ESC_E 27
+#endif
+
+#ifndef ESC_F
+#define ESC_F '\f'
+#endif
+
+#ifndef NEWLINE
+#define NEWLINE '\n'
+#endif
+
+#ifndef ESC_N
+#define ESC_N NEWLINE
+#endif
+
+#ifndef ESC_R
+#define ESC_R '\r'
+#endif
+
+#ifndef ESC_T
+#define ESC_T '\t'
+#endif
+
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns
their negation. Also, they must appear in the same order as in the opcode
definitions below, up to ESC_z. The final one must be ESC_REF as subsequent
values are used for \1, \2, \3, etc. There is a test in the code for an escape
-greater than ESC_b and less than ESC_X to detect the types that may be
+greater than ESC_b and less than ESC_Z to detect the types that may be
repeated. If any new escapes are put in-between that don't consume a character,
that code will have to change. */
@@ -224,19 +252,26 @@
OP_ONCE, /* Once matched, don't back up into the subpattern */
OP_COND, /* Conditional group */
- OP_CREF, /* Used to hold an extraction string number */
+ OP_CREF, /* Used to hold an extraction string number (cond ref) */
OP_BRAZERO, /* These two must remain together and in this */
OP_BRAMINZERO, /* order. */
+ OP_BRANUMBER, /* Used for extracting brackets whose number is greater
+ than can fit into an opcode. */
+
OP_BRA /* This and greater values are used for brackets that
- extract substrings. */
+ extract substrings up to a basic limit. After that,
+ use is made of OP_BRANUMBER. */
};
-/* The highest extraction number. This is limited by the number of opcodes
-left after OP_BRA, i.e. 255 - OP_BRA. We actually set it somewhat lower. */
+/* The highest extraction number before we have to start using additional
+bytes. (Originally PCRE didn't have support for extraction counts highter than
+this number.) The value is limited by the number of opcodes left after OP_BRA,
+i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
+opcodes. */
-#define EXTRACT_MAX 99
+#define EXTRACT_BASIC_MAX 150
/* The texts of compile-time error messages are defined as macros here so that
they can be accessed by the POSIX wrapper and converted into error codes. Yes,
@@ -255,13 +290,13 @@
#define ERR10 "operand of unlimited repeat could match the empty string"
#define ERR11 "internal error: unexpected repeat"
#define ERR12 "unrecognized character after (?"
-#define ERR13 "too many capturing parenthesized sub-patterns"
+#define ERR13 "unused error"
#define ERR14 "missing )"
#define ERR15 "back reference to non-existent subpattern"
#define ERR16 "erroffset passed as NULL"
#define ERR17 "unknown option bit(s) set"
#define ERR18 "missing ) after comment"
-#define ERR19 "too many sets of parentheses"
+#define ERR19 "parentheses nested too deeply"
#define ERR20 "regular expression too large"
#define ERR21 "failed to get memory"
#define ERR22 "unmatched parentheses"
@@ -296,8 +331,8 @@
size_t size;
const unsigned char *tables;
unsigned long int options;
- uschar top_bracket;
- uschar top_backref;
+ unsigned short int top_bracket;
+ unsigned short int top_backref;
uschar first_char;
uschar req_char;
uschar code[1];
Index: maketables.c
===================================================================
RCS file: /cvsroot/mp4h/mp4h/pcre/maketables.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- maketables.c 2000/07/19 21:43:52 1.1
+++ maketables.c 2001/11/08 22:55:47 1.2
@@ -8,7 +8,7 @@
Written by: Philip Hazel <ph...@ca...>
- Copyright (c) 1997-2000 University of Cambridge
+ Copyright (c) 1997-2001 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -58,7 +58,7 @@
Returns: pointer to the contiguous block of data
*/
-unsigned const char *
+const unsigned char *
pcre_maketables(void)
{
unsigned char *yield, *p;
Index: pcre.c
===================================================================
RCS file: /cvsroot/mp4h/mp4h/pcre/pcre.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- pcre.c 2001/08/12 18:52:42 1.2
+++ pcre.c 2001/11/08 22:55:47 1.3
@@ -9,7 +9,7 @@
Written by: Philip Hazel <ph...@ca...>
- Copyright (c) 1997-2000 University of Cambridge
+ Copyright (c) 1997-2001 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -60,8 +60,11 @@
#endif
-/* Number of items on the nested bracket stacks at compile time. This should
-not be set greater than 200. */
+/* Maximum number of items on the nested bracket stacks at compile time. This
+applies to the nesting of all kinds of parentheses. It does not limit
+un-nested, non-capturing parentheses. This number can be made bigger if
+necessary - it is used to dimension one int and one unsigned char vector at
+compile time. */
#define BRASTACK_SIZE 200
@@ -95,7 +98,7 @@
"class", "Ref", "Recurse",
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
- "Brazero", "Braminzero", "Bra"
+ "Brazero", "Braminzero", "Branumber", "Bra"
};
#endif
@@ -111,9 +114,9 @@
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
- '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
- 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
- 0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
+ '`', 7, -ESC_b, 0, -ESC_d, ESC_E, ESC_F, 0, /* ` - g */
+ 0, 0, 0, 0, 0, 0, ESC_N, 0, /* h - o */
+ 0, 0, ESC_R, -ESC_s, ESC_T, 0, 0, -ESC_w, /* p - w */
0, 0, -ESC_z /* x - z */
};
@@ -814,10 +817,11 @@
/* Skip over things that don't match chars */
case OP_REVERSE:
+ case OP_BRANUMBER:
+ case OP_CREF:
cc++;
/* Fall through */
- case OP_CREF:
case OP_OPT:
cc++;
/* Fall through */
@@ -871,7 +875,7 @@
/* Check a class for variable quantification */
case OP_CLASS:
- cc += (*cc == OP_REF)? 2 : 33;
+ cc += 33;
switch (*cc)
{
@@ -978,7 +982,7 @@
Arguments:
options the option bits
- brackets points to number of brackets used
+ brackets points to number of extracting brackets used
code points to the pointer to the current code point
ptrptr points to the current pattern pointer
errorptr points to pointer to error message
@@ -1029,7 +1033,7 @@
int class_charcount;
int class_lastchar;
int newoptions;
- int condref;
+ int skipbytes;
int subreqchar;
c = *ptr;
@@ -1040,7 +1044,7 @@
{
/* The space before the ; is to avoid a warning on a silly compiler
on the Macintosh. */
- while ((c = *(++ptr)) != 0 && c != '\n') ;
+ while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
continue;
}
}
@@ -1578,7 +1582,7 @@
OP_BRAZERO in front of it, and because the group appears once in the
data, whereas in other cases it appears the minimum number of times. For
this reason, it is simplest to treat this case separately, as otherwise
- the code gets far too mess. There are several special subcases when the
+ the code gets far too messy. There are several special subcases when the
minimum is zero. */
if (repeat_min == 0)
@@ -1729,7 +1733,7 @@
case '(':
newoptions = options;
- condref = -1;
+ skipbytes = 0;
if (*(++ptr) == '?')
{
@@ -1752,7 +1756,7 @@
bravalue = OP_COND; /* Conditional group */
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
{
- condref = *ptr - '0';
+ int condref = *ptr - '0';
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
if (condref == 0)
{
@@ -1760,6 +1764,10 @@
goto FAILED;
}
ptr++;
+ code[3] = OP_CREF;
+ code[4] = condref >> 8;
+ code[5] = condref & 255;
+ skipbytes = 3;
}
else ptr--;
break;
@@ -1862,16 +1870,21 @@
}
}
- /* Else we have a referencing group; adjust the opcode. */
+ /* Else we have a referencing group; adjust the opcode. If the bracket
+ number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and
+ arrange for the true number to follow later, in an OP_BRANUMBER item. */
else
{
- if (++(*brackets) > EXTRACT_MAX)
+ if (++(*brackets) > EXTRACT_BASIC_MAX)
{
- *errorptr = ERR13;
- goto FAILED;
+ bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
+ code[3] = OP_BRANUMBER;
+ code[4] = *brackets >> 8;
+ code[5] = *brackets & 255;
+ skipbytes = 3;
}
- bravalue = OP_BRA + *brackets;
+ else bravalue = OP_BRA + *brackets;
}
/* Process nested bracketed re. Assertions may not be repeated, but other
@@ -1887,13 +1900,13 @@
options | PCRE_INGROUP, /* Set for all nested groups */
((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
- brackets, /* Bracket level */
+ brackets, /* Extracting bracket count */
&tempcode, /* Where to put code (updated) */
&ptr, /* Input pointer (updated) */
errorptr, /* Where to put an error message */
(bravalue == OP_ASSERTBACK ||
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
- condref, /* Condition reference number */
+ skipbytes, /* Skip over OP_COND/OP_BRANUMBER */
&subreqchar, /* For possible last char */
&subcountlits, /* For literal count */
cd)) /* Tables block */
@@ -1907,7 +1920,7 @@
/* If this is a conditional bracket, check that there are no more than
two branches in the group. */
- if (bravalue == OP_COND)
+ else if (bravalue == OP_COND)
{
uschar *tc = code;
condcount = 0;
@@ -1974,9 +1987,11 @@
{
if (-c >= ESC_REF)
{
+ int number = -c - ESC_REF;
previous = code;
*code++ = OP_REF;
- *code++ = -c - ESC_REF;
+ *code++ = number >> 8;
+ *code++ = number & 255;
}
else
{
@@ -2011,7 +2026,7 @@
{
/* The space before the ; is to avoid a warning on a silly compiler
on the Macintosh. */
- while ((c = *(++ptr)) != 0 && c != '\n') ;
+ while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
if (c == 0) break;
continue;
}
@@ -2100,7 +2115,7 @@
ptrptr -> the address of the current pattern pointer
errorptr -> pointer to error message
lookbehind TRUE if this is a lookbehind assertion
- condref >= 0 for OPT_CREF setting at start of conditional group
+ skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER)
reqchar -> place to put the last required character, or a negative number
countlits -> place to put the shortest literal count of any branch
cd points to the data block with tables pointers
@@ -2110,7 +2125,7 @@
static BOOL
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
- const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
+ const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes,
int *reqchar, int *countlits, compile_data *cd)
{
const uschar *ptr = *ptrptr;
@@ -2123,16 +2138,7 @@
*reqchar = -1;
*countlits = INT_MAX;
-code += 3;
-
-/* At the start of a reference-based conditional group, insert the reference
-number as an OP_CREF item. */
-
-if (condref >= 0)
- {
- *code++ = OP_CREF;
- *code++ = condref;
- }
+code += 3 + skipbytes;
/* Loop for each alternative branch */
@@ -2284,7 +2290,8 @@
break;
case OP_CREF:
- code += 2;
+ case OP_BRANUMBER:
+ code += 3;
break;
case OP_WORD_BOUNDARY:
@@ -2547,6 +2554,7 @@
{
int min, max;
int class_charcount;
+ int bracket_length;
if ((options & PCRE_EXTENDED) != 0)
{
@@ -2555,7 +2563,7 @@
{
/* The space before the ; is to avoid a warning on a silly compiler
on the Macintosh. */
- while ((c = *(++ptr)) != 0 && c != '\n') ;
+ while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
continue;
}
}
@@ -2581,7 +2589,7 @@
}
length++;
- /* A back reference needs an additional char, plus either one or 5
+ /* A back reference needs an additional 2 bytes, plus either one or 5
bytes for a repeat. We also need to keep the value of the highest
back reference. */
@@ -2589,7 +2597,7 @@
{
int refnum = -c - ESC_REF;
if (refnum > top_backref) top_backref = refnum;
- length++; /* For single back reference */
+ length += 2; /* For single back reference */
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
{
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
@@ -2687,6 +2695,7 @@
case '(':
branch_newextra = 0;
+ bracket_length = 3;
/* Handle special forms of bracket, which all start (? */
@@ -2754,7 +2763,7 @@
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
{
ptr += 4;
- length += 2;
+ length += 3;
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
if (*ptr != ')')
{
@@ -2881,15 +2890,19 @@
}
/* Extracting brackets must be counted so we can process escapes in a
- Perlish way. */
+ Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to
+ need an additional 3 bytes of store per extracting bracket. */
- else bracount++;
+ else
+ {
+ bracount++;
+ if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
+ }
- /* Non-special forms of bracket. Save length for computing whole length
- at end if there's a repeat that requires duplication of the group. Also
- save the current value of branch_extra, and start the new group with
- the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
- for a lookbehind assertion. */
+ /* Save length for computing whole length at end if there's a repeat that
+ requires duplication of the group. Also save the current value of
+ branch_extra, and start the new group with the new value. If non-zero, this
+ will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */
if (brastackptr >= sizeof(brastack)/sizeof(int))
{
@@ -2901,7 +2914,7 @@
branch_extra = branch_newextra;
brastack[brastackptr++] = length;
- length += 3;
+ length += bracket_length;
continue;
/* Handle ket. Look for subsequent max/min; for certain sets of values we
@@ -2981,7 +2994,7 @@
{
/* The space before the ; is to avoid a warning on a silly compiler
on the Macintosh. */
- while ((c = *(++ptr)) != 0 && c != '\n') ;
+ while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
continue;
}
}
@@ -3062,7 +3075,7 @@
code = re->code;
*code = OP_BRA;
bracount = 0;
-(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
+(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, 0,
&reqchar, &countlits, &compile_block);
re->top_bracket = bracount;
re->top_backref = top_backref;
@@ -3176,7 +3189,10 @@
if (*code >= OP_BRA)
{
- printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
+ if (*code - OP_BRA > EXTRACT_BASIC_MAX)
+ printf("%3d Bra extra", (code[1] << 8) + code[2]);
+ else
+ printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
code += 2;
}
@@ -3187,16 +3203,6 @@
code++;
break;
- case OP_COND:
- printf("%3d Cond", (code[1] << 8) + code[2]);
- code += 2;
- break;
-
- case OP_CREF:
- printf(" %.2d %s", code[1], OP_names[*code]);
- code++;
- break;
-
case OP_CHARS:
charlength = *(++code);
printf("%3d ", charlength);
@@ -3213,11 +3219,10 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
- code += 2;
- break;
-
case OP_REVERSE:
+ case OP_BRANUMBER:
+ case OP_COND:
+ case OP_CREF:
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
code += 2;
break;
@@ -3290,8 +3295,8 @@
break;
case OP_REF:
- printf(" \\%d", *(++code));
- code ++;
+ printf(" \\%d", (code[1] << 8) | code[2]);
+ code += 3;
goto CLASS_REF_REPEAT;
case OP_CLASS:
@@ -3504,8 +3509,14 @@
if (op > OP_BRA)
{
+ int offset;
int number = op - OP_BRA;
- int offset = number << 1;
+
+ /* For extended extraction brackets (large number), we have to fish out the
+ number from a dummy opcode at the start. */
+
+ if (number > EXTRACT_BASIC_MAX) number = (ecode[4] << 8) | ecode[5];
+ offset = number << 1;
#ifdef DEBUG
printf("start bracket %d subject=", number);
@@ -3535,6 +3546,7 @@
md->offset_vector[offset] = save_offset1;
md->offset_vector[offset+1] = save_offset2;
md->offset_vector[md->offset_end - number] = save_offset3;
+
return FALSE;
}
@@ -3567,10 +3579,10 @@
case OP_COND:
if (ecode[3] == OP_CREF) /* Condition is extraction test */
{
- int offset = ecode[4] << 1; /* Doubled reference number */
+ int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled ref number */
return match(eptr,
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
- 5 : 3 + (ecode[1] << 8) + ecode[2]),
+ 6 : 3 + (ecode[1] << 8) + ecode[2]),
offset_top, md, ims, eptrb, match_isgroup);
}
@@ -3590,10 +3602,12 @@
}
/* Control never reaches here */
- /* Skip over conditional reference data if encountered (should not be) */
+ /* Skip over conditional reference or large extraction number data if
+ encountered. */
case OP_CREF:
- ecode += 2;
+ case OP_BRANUMBER:
+ ecode += 3;
break;
/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
@@ -3859,9 +3873,15 @@
if (*prev != OP_COND)
{
+ int offset;
int number = *prev - OP_BRA;
- int offset = number << 1;
+ /* For extended extraction brackets (large number), we have to fish out
+ the number from a dummy opcode at the start. */
+
+ if (number > EXTRACT_BASIC_MAX) number = (prev[4] << 8) | prev[5];
+ offset = number << 1;
+
#ifdef DEBUG
printf("end bracket %d", number);
printf("\n");
@@ -3920,7 +3940,7 @@
if (md->notbol && eptr == md->start_subject) return FALSE;
if ((ims & PCRE_MULTILINE) != 0)
{
- if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
+ if (eptr != md->start_subject && eptr[-1] != NEWLINE) return FALSE;
ecode++;
break;
}
@@ -3939,7 +3959,7 @@
case OP_DOLL:
if ((ims & PCRE_MULTILINE) != 0)
{
- if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
+ if (eptr < md->end_subject) { if (*eptr != NEWLINE) return FALSE; }
else { if (md->noteol) return FALSE; }
ecode++;
break;
@@ -3950,7 +3970,7 @@
if (!md->endonly)
{
if (eptr < md->end_subject - 1 ||
- (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
+ (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE;
ecode++;
break;
@@ -3969,7 +3989,7 @@
case OP_EODN:
if (eptr < md->end_subject - 1 ||
- (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
+ (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE;
ecode++;
break;
@@ -3991,7 +4011,7 @@
/* Match a single character type; inline for speed */
case OP_ANY:
- if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
+ if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
return FALSE;
if (eptr++ >= md->end_subject) return FALSE;
#ifdef SUPPORT_UTF8
@@ -4054,8 +4074,8 @@
case OP_REF:
{
int length;
- int offset = ecode[1] << 1; /* Doubled reference number */
- ecode += 2; /* Advance past the item */
+ int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled ref number */
+ ecode += 3; /* Advance past item */
/* If the reference is unset, set the length to be longer than the amount
of subject left; this ensures that every attempt at a match fails. We
@@ -4599,7 +4619,7 @@
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject ||
- (*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0))
+ (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))
return FALSE;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4608,7 +4628,7 @@
#endif
/* Non-UTF8 can be faster */
if ((ims & PCRE_DOTALL) == 0)
- { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
+ { for (i = 1; i <= min; i++) if (*eptr++ == NEWLINE) return FALSE; }
else eptr += min;
break;
@@ -4663,7 +4683,7 @@
switch(ctype)
{
case OP_ANY:
- if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
+ if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return FALSE;
#ifdef SUPPORT_UTF8
if (md->utf8)
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
@@ -4718,7 +4738,7 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || *eptr++ == '\n') break;
+ if (eptr >= md->end_subject || *eptr++ == NEWLINE) break;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
}
@@ -4738,7 +4758,7 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || *eptr == '\n') break;
+ if (eptr >= md->end_subject || *eptr == NEWLINE) break;
eptr++;
}
}
@@ -4879,8 +4899,8 @@
const real_pcre *re = (const real_pcre *)external_re;
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
BOOL using_temporary_offsets = FALSE;
-BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
-BOOL startline = (re->options & PCRE_STARTLINE) != 0;
+BOOL anchored;
+BOOL startline;
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
@@ -4888,6 +4908,9 @@
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
+anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+startline = (re->options & PCRE_STARTLINE) != 0;
+
match_block.start_pattern = re->code;
match_block.start_subject = (const uschar *)subject;
match_block.end_subject = match_block.start_subject + length;
@@ -5016,7 +5039,7 @@
{
if (start_match > match_block.start_subject + start_offset)
{
- while (start_match < end_subject && start_match[-1] != '\n')
+ while (start_match < end_subject && start_match[-1] != NEWLINE)
start_match++;
}
}
@@ -5121,7 +5144,7 @@
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
- if (match_block.offset_end < 2) rc = 0; else
+ if (offsetcount < 2) rc = 0; else
{
offsets[0] = start_match - match_block.start_subject;
offsets[1] = match_block.end_match_ptr - match_block.start_subject;
Index: pcre.h
===================================================================
RCS file: /cvsroot/mp4h/mp4h/pcre/pcre.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- pcre.h 2001/08/12 18:52:42 1.2
+++ pcre.h 2001/11/08 22:55:47 1.3
@@ -2,7 +2,7 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* Copyright (c) 1997-2000 University of Cambridge */
+/* Copyright (c) 1997-2001 University of Cambridge */
#ifndef _PCRE_H
#define _PCRE_H
@@ -10,9 +10,9 @@
/* The file pcre.h is build by "configure". Do not edit it; instead
make changes to pcre.in. */
-#define PCRE_MAJOR 3
-#define PCRE_MINOR 4
-#define PCRE_DATE 22-Aug-2000
+#define PCRE_MAJOR 3
+#define PCRE_MINOR 7
+#define PCRE_DATE 29-Oct-2001
/* Win32 uses DLL by default */
@@ -74,9 +74,12 @@
/* Types */
-typedef void pcre;
-typedef void pcre_extra;
+struct real_pcre; /* declaration; the definition is private */
+struct real_pcre_extra; /* declaration; the definition is private */
+typedef struct real_pcre pcre;
+typedef struct real_pcre_extra pcre_extra;
+
/* Store get and free functions. These can be set to alternative malloc/free
functions if required. Some magic is required for Win32 DLL; it is null on
other OS. */
@@ -99,7 +102,7 @@
extern int pcre_get_substring_list(const char *, int *, int, const char ***);
extern int pcre_info(const pcre *, int *, int *);
extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *);
-extern unsigned const char *pcre_maketables(void);
+extern const unsigned char *pcre_maketables(void);
extern pcre_extra *pcre_study(const pcre *, int, const char **);
extern const char *pcre_version(void);
Index: study.c
===================================================================
RCS file: /cvsroot/mp4h/mp4h/pcre/study.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- study.c 2000/07/19 21:43:52 1.1
+++ study.c 2001/11/08 22:55:47 1.2
@@ -9,7 +9,7 @@
Written by: Philip Hazel <ph...@ca...>
- Copyright (c) 1997-2000 University of Cambridge
+ Copyright (c) 1997-2001 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -104,8 +104,6 @@
while (try_next)
{
- try_next = FALSE;
-
/* If a branch starts with a bracket or a positive lookahead assertion,
recurse to set bits from within them. That's all for this branch. */
@@ -113,6 +111,7 @@
{
if (!set_start_bits(tcode, start_bits, caseless, cd))
return FALSE;
+ try_next = FALSE;
}
else switch(*tcode)
@@ -120,12 +119,17 @@
default:
return FALSE;
+ /* Skip over extended extraction bracket number */
+
+ case OP_BRANUMBER:
+ tcode += 3;
+ break;
+
/* Skip over lookbehind and negative lookahead assertions */
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
- try_next = TRUE;
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
tcode += 3;
break;
@@ -135,7 +139,6 @@
case OP_OPT:
caseless = (tcode[1] & PCRE_CASELESS) != 0;
tcode += 2;
- try_next = TRUE;
break;
/* BRAZERO does the bracket, but carries on. */
@@ -147,7 +150,6 @@
dummy = 1;
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
tcode += 3;
- try_next = TRUE;
break;
/* Single-char * or ? sets the bit and tries the next item */
@@ -158,7 +160,6 @@
case OP_MINQUERY:
set_bit(start_bits, tcode[1], caseless, cd);
tcode += 2;
- try_next = TRUE;
break;
/* Single-char upto sets the bit and tries the next */
@@ -167,7 +168,6 @@
case OP_MINUPTO:
set_bit(start_bits, tcode[3], caseless, cd);
tcode += 4;
- try_next = TRUE;
break;
/* At least one single char sets the bit and stops */
@@ -181,6 +181,7 @@
case OP_PLUS:
case OP_MINPLUS:
set_bit(start_bits, tcode[1], caseless, cd);
+ try_next = FALSE;
break;
/* Single character type sets the bits and stops */
@@ -188,31 +189,37 @@
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
+ try_next = FALSE;
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
+ try_next = FALSE;
break;
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
+ try_next = FALSE;
break;
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
+ try_next = FALSE;
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
+ try_next = FALSE;
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
+ try_next = FALSE;
break;
/* One or more character type fudges the pointer and restarts, knowing
@@ -221,12 +228,10 @@
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
tcode++;
- try_next = TRUE;
break;
case OP_TYPEEXACT:
tcode += 3;
- try_next = TRUE;
break;
/* Zero or more repeats of character types set the bits and then
@@ -274,7 +279,6 @@
}
tcode += 2;
- try_next = TRUE;
break;
/* Character class: set the bits and either carry on or not,
@@ -292,16 +296,16 @@
case OP_CRQUERY:
case OP_CRMINQUERY:
tcode++;
- try_next = TRUE;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
- if (((tcode[1] << 8) + tcode[2]) == 0)
- {
- tcode += 5;
- try_next = TRUE;
- }
+ if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
+ else try_next = FALSE;
+ break;
+
+ default:
+ try_next = FALSE;
break;
}
}
|