[q-lang-cvs] q/modules/clib clib.c, 1.84, 1.85 clib.q, 1.35, 1.36 system.c, 1.4, 1.5 system.q, 1.4,
Brought to you by:
agraef
From: Albert G. <ag...@us...> - 2008-01-23 05:24:36
|
Update of /cvsroot/q-lang/q/modules/clib In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv21680 Modified Files: clib.c clib.q system.c system.q Log Message: move glob and regex functions back into clib Index: system.q =================================================================== RCS file: /cvsroot/q-lang/q/modules/clib/system.q,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** system.q 16 Dec 2007 20:23:21 -0000 1.4 --- system.q 23 Jan 2008 05:24:32 -0000 1.5 *************** *** 921,981 **** public extern ngettext MSGID1 MSGID2 N, dngettext DOMAIN MSGID1 MSGID2 N, dcngettext DOMAIN MSGID1 MSGID2 N CATEGORY; - - /****************************************************************************/ - - /* Filename globbing using the shell's wildcard syntax (*, ? etc.). */ - - public extern fnmatch PATTERN S; // check whether S matches PATTERN - public extern glob PATTERN; // return the list of all filenames - // matching PATTERN - - /****************************************************************************/ - - /* Regular expression matching using "extended" (egrep-like) syntax as defined - by POSIX 1003.2/D11.2. */ - - /* 1. Low-level interface. The following functions are directly implemented in - C using the POSIX regex functions. The regmatch function searches for the - first match, regnext for the next, and regdone terminates a global search - still in progress. The OPTS string allows you to specify various options - for the search. In particular, "g" denotes a global, "i" a - case-insensitive, and "n" a "multi-line" search; see the documentation for - further details. */ - - public extern regmatch OPTS REGEX S, regnext, regdone; - - /* 2. High-level interface. The regex function evaluates, for each match of - the given regular expression in the given string, the special EXPR - argument, and returns the collection of all results as a list. The OPTS - argument has the same meaning as with the low-level functions. In - particular, if the "g" option is omitted, then only the first match will be - reported, if any. */ - - public special regex ~OPTS ~REGEX ~S EXPR; - - private special regex_next ~Xs EXPR, check ~P X Y; - - regex OPTS:String REGEX:String S:String EXPR - = check (regmatch OPTS REGEX S) - (reverse (regex_next [EXPR] EXPR)) []; - - regex_next Xs EXPR = check regnext - (regex_next [EXPR|Xs] EXPR) Xs; - - check P:Bool X Y = X if P; - = Y otherwise; - check P X Y = P otherwise; - - /* 3. Match state information. These functions are typically invoked after - regmatch, regnext, or in the EXPR argument of regex, to return information - about the current match. The match state is maintained on a hidden stack - manipulated with the regmatch/regnext/regdone functions, hence multiple - nested searches are possible. */ - - public extern regstart, regskip, reg N, regpos N, regend N, regs; - - /* An expression of the form `regerr MSG', where MSG is the error message, is - used to return abnormal error conditions such as bad regular expression - syntax. You can redefine `regerr' as appropriate for your application. */ - - public regerr MSG; --- 921,922 ---- Index: clib.q =================================================================== RCS file: /cvsroot/q-lang/q/modules/clib/clib.q,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** clib.q 22 Jan 2008 12:00:46 -0000 1.35 --- clib.q 23 Jan 2008 05:24:32 -0000 1.36 *************** *** 443,444 **** --- 443,503 ---- #SEM:Semaphore = get_size SEM; + + /****************************************************************************/ + + /* Filename globbing using the shell's wildcard syntax (*, ? etc.). */ + + public extern fnmatch PATTERN S; // check whether S matches PATTERN + public extern glob PATTERN; // return the list of all filenames + // matching PATTERN + + /****************************************************************************/ + + /* Regular expression matching using "extended" (egrep-like) syntax as defined + by POSIX 1003.2/D11.2. */ + + /* 1. Low-level interface. The following functions are directly implemented in + C using the POSIX regex functions. The regmatch function searches for the + first match, regnext for the next, and regdone terminates a global search + still in progress. The OPTS string allows you to specify various options + for the search. In particular, "g" denotes a global, "i" a + case-insensitive, and "n" a "multi-line" search; see the documentation for + further details. */ + + public extern regmatch OPTS REGEX S, regnext, regdone; + + /* 2. High-level interface. The regex function evaluates, for each match of + the given regular expression in the given string, the special EXPR + argument, and returns the collection of all results as a list. The OPTS + argument has the same meaning as with the low-level functions. In + particular, if the "g" option is omitted, then only the first match will be + reported, if any. */ + + public special regex ~OPTS ~REGEX ~S EXPR; + + private special regex_next ~Xs EXPR, check ~P X Y; + + regex OPTS:String REGEX:String S:String EXPR + = check (regmatch OPTS REGEX S) + (reverse (regex_next [EXPR] EXPR)) []; + + regex_next Xs EXPR = check regnext + (regex_next [EXPR|Xs] EXPR) Xs; + + check P:Bool X Y = X if P; + = Y otherwise; + check P X Y = P otherwise; + + /* 3. Match state information. These functions are typically invoked after + regmatch, regnext, or in the EXPR argument of regex, to return information + about the current match. The match state is maintained on a hidden stack + manipulated with the regmatch/regnext/regdone functions, hence multiple + nested searches are possible. */ + + public extern regstart, regskip, reg N, regpos N, regend N, regs; + + /* An expression of the form `regerr MSG', where MSG is the error message, is + used to return abnormal error conditions such as bad regular expression + syntax. You can redefine `regerr' as appropriate for your application. */ + + public regerr MSG; Index: system.c =================================================================== RCS file: /cvsroot/q-lang/q/modules/clib/system.c,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** system.c 16 Dec 2007 20:23:21 -0000 1.4 --- system.c 23 Jan 2008 05:24:32 -0000 1.5 *************** *** 147,162 **** #endif - #ifdef HAVE_REGEX_H - #include <regex.h> - #endif - - #ifdef HAVE_GLOB_H - #include <glob.h> - #endif - - #ifdef HAVE_FNMATCH_H - #include <fnmatch.h> - #endif - #ifdef USE_READLINE #include <readline/readline.h> --- 147,150 ---- *************** *** 241,247 **** #include <iconv.h> #include <libintl.h> - #include <fnmatch.h> - #include <glob.h> - #include <regex.h> #include <readline.h> #include <history.h> --- 229,232 ---- *************** *** 250,257 **** #define HAVE_RL_COMPLETION_MATCHES 1 - #define HAVE_FNMATCH 1 - #define HAVE_GLOB 1 - #define HAVE_REGCOMP 1 - #define HAVE_STRDUP 1 #define HAVE_MEMCPY 1 --- 235,238 ---- *************** *** 6375,6863 **** #endif - /* filename globbing: *****************************************************/ - - FUNCTION(system,fnmatch,argc,argv) - { - #ifdef HAVE_FNMATCH - char *pattern, *s; - if (argc == 2 && isstr(argv[0], &pattern) && isstr(argv[1], &s)) { - int res; - pattern = utf8_to_sys(pattern); s = utf8_to_sys(s); - if (!pattern || !s) { - if (pattern) free(pattern); if (s) free(s); - return __ERROR; - } - res = fnmatch(pattern, s, 0); - free(pattern); free(s); - if (res) - return mkfalse; - else - return mktrue; - } else - #endif - return __FAIL; - } - - FUNCTION(system,glob,argc,argv) - { - #ifdef HAVE_GLOB - char *pattern; - if (argc == 1 && isstr(argv[0], &pattern)) { - glob_t g; - int res; - g.gl_offs = 0; - pattern = utf8_to_sys(pattern); - if (!pattern) return __ERROR; - res = glob(pattern, 0, NULL, &g); - free(pattern); - if (res == GLOB_NOMATCH) - return mknil; - else if (res) - return __FAIL; - else { - expr x = mknil; - int i = g.gl_pathc; - while (x && --i >= 0) - x = mkcons(mkstr(sys_to_utf8(g.gl_pathv[i])), x); - globfree(&g); - if (x) - return x; - else - return __ERROR; - } - } else - #endif - return __FAIL; - } - - /* regular expression matching: *******************************************/ - - #ifdef HAVE_REGCOMP - - /* regexp stack */ - - typedef struct { - unsigned done:1, global:2, matched:1; - int cflags, eflags; - regex_t rx; - regmatch_t *matches; - char *s, *p, *start; - } regstate_t; - - long regalloc = 0; - regstate_t *regstack = NULL, *regp = NULL; - char regmsg[BUFSZ]; - - #define REGALLOC 50 - - static int reg_push(void) - { - if (!regstack) - if ((regstack = malloc(REGALLOC*sizeof(regstate_t)))) { - regalloc = REGALLOC; - regp = regstack; - } else - return -1; - else if (!regp) - regp = regstack; - else if (regp-regstack+1 == regalloc) { - regstate_t *newstack = realloc(regstack, - (regalloc+REGALLOC)*sizeof(regstate_t)); - if (newstack) { - regstack = newstack; - regp = regstack+regalloc; - regalloc += REGALLOC; - } else - return -1; - } else - regp++; - regp->done = regp->global = regp->matched = 0; - regp->cflags = regp->eflags = 0; - regp->matches = NULL; - regp->s = regp->p = regp->start = NULL; - return 0; - } - - static void reg_pop(void) - { - if (!regp) return; - regfree(®p->rx); - if (regp->matches) free(regp->matches); - if (regp->s) free(regp->s); - if (regp > regstack) - regp--; - else - regp = NULL; - } - - /* push a new expression on the stack */ - - static int reg_add(char *pattern, char *s, int global, int cflags, int eflags) - { - int ret; - if (regp && regp->done) reg_pop(); - if (reg_push()) return -1; - regp->global = global; - regp->cflags = cflags; - regp->eflags = eflags; - ret = regcomp(®p->rx, pattern, REG_EXTENDED|cflags); - *regmsg = 0; - if (ret) { - regerror(ret, ®p->rx, regmsg, BUFSZ); - reg_pop(); - return ret; - } - if (!(regp->s = strdup(s))) { - reg_pop(); - return -1; - } - regp->p = regp->s; regp->start = NULL; - if (!(regp->matches = malloc((regp->rx.re_nsub+1)*sizeof(regmatch_t)))) { - reg_pop(); - return -1; - } - return 0; - } - - /* search */ - - static int reg_flags(char *p) - { - int flags; - flags = regp->eflags; - if (p > regp->s) - if (regp->cflags & REG_NEWLINE) - if (p[-1] == '\n') - flags &= ~REG_NOTBOL; - else - flags |= REG_NOTBOL; - else - flags |= REG_NOTBOL; - return flags; - } - - static int reg_search(void) - { - int ret; - char *prev; - while (regp && regp->done && regp>regstack) reg_pop(); - if (!regp) return -1; - if (regp->matched) - /* note the beginning of the previous match */ - prev = regp->start+regp->matches[0].rm_so; - regp->start = regp->p; - if (regp->global || !regp->matched) { - ret = regexec(®p->rx, regp->p, regp->rx.re_nsub+1, regp->matches, - reg_flags(regp->p)); - if (!ret) { - if (regp->matched) - if (regp->matches[0].rm_eo == regp->matches[0].rm_so && - regp->p == prev) - /* an extra empty match: if not at end of string then advance to the - next position and try again; otherwise simply ignore this match - and fail */ - if (*regp->p) { - int i; - /* this cannot fail since we can always match the empty string */ - ret = regexec(®p->rx, regp->p+1, regp->rx.re_nsub+1, - regp->matches, reg_flags(regp->p+1)); - /* translate offsets */ - for (i = 0; i <= regp->rx.re_nsub; i++) { - regp->matches[i].rm_so++; - regp->matches[i].rm_eo++; - } - } else - ret = REG_NOMATCH; - regp->matched = 1; - } - } else - ret = REG_NOMATCH; - *regmsg = 0; - if (ret) { - regp->done = 1; - regerror(ret, ®p->rx, regmsg, BUFSZ); - } else if (regp->global == 2 && - regp->matches[0].rm_eo > regp->matches[0].rm_so) - regp->p += regp->matches[0].rm_so+1; - else - regp->p += regp->matches[0].rm_eo; - return ret; - } - - /* stop search */ - - static void reg_done(void) - { - if (regp) { - regp->start = regp->p; - regp->done = 1; - } - } - - /* return matches */ - - static size_t reg_nmatches(void) - { - if (regp) - return regp->rx.re_nsub; - else - return 0; - } - - static long reg_start(void) - { - if (regp && regp->start) - return regp->start-regp->s; - else - return -1; - } - - static char *reg_skipstr(void) - { - if (regp && regp->start) - return regp->start; - else - return NULL; - } - - static long reg_pos(int i) - { - if (regp && regp->start && 0 <= i && i <= regp->rx.re_nsub) - if (!regp->done && regp->matches[i].rm_so >= 0) - return regp->start+regp->matches[i].rm_so-regp->s; - else - return -1; - else - return -1; - } - - static long reg_end(int i) - { - if (regp && regp->start && 0 <= i && i <= regp->rx.re_nsub) - if (!regp->done && regp->matches[i].rm_eo >= 0) - return regp->start+regp->matches[i].rm_eo-regp->s; - else - return -1; - else - return -1; - } - - static char *reg_str(int i) - { - if (regp && regp->start && 0 <= i && i <= regp->rx.re_nsub) - if (!regp->done && regp->matches[i].rm_so >= 0) - return regp->start+regp->matches[i].rm_so; - else - return NULL; - else - return NULL; - } - - #endif - - /* interface functions */ - - FUNCTION(system,regmatch,argc,argv) - { - #ifdef HAVE_REGCOMP - char *opts, *regex, *s; - int cflags = 0, eflags = 0, global = 0, ret; - if (argc != 3 || !isstr(argv[0], &opts) || !isstr(argv[1], ®ex) || - !isstr(argv[2], &s)) - return __FAIL; - while (*opts) - switch (*(opts++)) { - case 'g': - if (!global) global = 1; - break; - case 'G': - global = 2; - break; - case 'i': - cflags |= REG_ICASE; - break; - case 'n': - cflags |= REG_NEWLINE; - break; - case '^': - eflags |= REG_NOTBOL; - break; - case '$': - eflags |= REG_NOTEOL; - break; - default: - return __FAIL; - } - regex = utf8_to_sys(regex); s = utf8_to_sys(s); - if (!regex || !s) { - if (regex) free(regex); if (s) free(s); - return __ERROR; - } - ret = reg_add(regex, s, global, cflags, eflags); - free(regex); free(s); - if (ret == -1) - return __ERROR; - else if (ret) - return mkapp(mksym(sym(regerr)), mkstr(sys_to_utf8(regmsg))); - ret = reg_search(); - if (ret == -1 || ret == REG_NOMATCH) - return mkfalse; - else if (ret) - return mkapp(mksym(sym(regerr)), mkstr(sys_to_utf8(regmsg))); - else - return mktrue; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regnext,argc,argv) - { - #ifdef HAVE_REGCOMP - int ret; - if (argc != 0) return __FAIL; - ret = reg_search(); - if (ret == -1 || ret == REG_NOMATCH) - return mkfalse; - else if (ret) - return mkapp(mksym(sym(regerr)), mkstr(sys_to_utf8(regmsg))); - else - return mktrue; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regdone,argc,argv) - { - #ifdef HAVE_REGCOMP - if (argc != 0) return __FAIL; - reg_done(); - return mkvoid; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regstart,argc,argv) - { - #ifdef HAVE_REGCOMP - long start; - if (argc != 0) return __FAIL; - start = reg_start(); - if (start >= 0) - return mkint(start); - else - return __FAIL; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regskip,argc,argv) - { - #ifdef HAVE_REGCOMP - char *skip; - if (argc != 0) return __FAIL; - if ((skip = reg_skipstr())) { - long start = reg_start(), pos = reg_pos(0); - char *s, *t; - if (pos >= start) - s = malloc(pos-start+1); - else - s = malloc(strlen(skip)+1); - if (!s) return __ERROR; - if (pos >= start) { - strncpy(s, skip, pos-start); - s[pos-start] = 0; - } else - strcpy(s, skip); - t = sys_to_utf8(s); free(s); - return mkstr(t); - } else - return __FAIL; - #else - return __FAIL; - #endif - } - - FUNCTION(system,reg,argc,argv) - { - #ifdef HAVE_REGCOMP - long i; - if (argc != 1 || !isint(argv[0], &i) || i < 0 || i > reg_nmatches()) - return __FAIL; - if (reg_start() >= 0) { - long pos = reg_pos(i), end = reg_end(i); - char *s, *t; - if (pos < 0 || end < 0) - s = strdup(""); - else if (!(s = malloc(end-pos+1))) - return __ERROR; - else { - strncpy(s, reg_str(i), end-pos); - s[end-pos] = 0; - } - t = sys_to_utf8(s); free(s); - return mkstr(t); - } else - return __FAIL; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regpos,argc,argv) - { - #ifdef HAVE_REGCOMP - long i; - if (argc != 1 || !isint(argv[0], &i) || i < 0 || i > reg_nmatches()) - return __FAIL; - if (reg_start() >= 0) - return mkint(reg_pos(i)); - else - return __FAIL; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regend,argc,argv) - { - #ifdef HAVE_REGCOMP - long i; - if (argc != 1 || !isint(argv[0], &i) || i < 0 || i > reg_nmatches()) - return __FAIL; - if (reg_start() >= 0) - return mkint(reg_end(i)); - else - return __FAIL; - #else - return __FAIL; - #endif - } - - FUNCTION(system,regs,argc,argv) - { - #ifdef HAVE_REGCOMP - expr x; - size_t i; - if (argc != 0) return __FAIL; - x = mknil; - i = reg_nmatches(); - while (x && i > 0) { - if (reg_pos(i) >= 0 && reg_end(i) >= 0) - x = mkcons(mkint(i), x); - i--; - } - if (x) - return x; - else - return __ERROR; - #else - return __FAIL; - #endif - } - INIT(system) { --- 6356,6359 ---- Index: clib.c =================================================================== RCS file: /cvsroot/q-lang/q/modules/clib/clib.c,v retrieving revision 1.84 retrieving revision 1.85 diff -C2 -d -r1.84 -r1.85 *** clib.c 22 Jan 2008 12:00:46 -0000 1.84 --- clib.c 23 Jan 2008 05:24:32 -0000 1.85 *************** *** 72,75 **** --- 72,87 ---- #endif + #ifdef HAVE_REGEX_H + #include <regex.h> + #endif + + #ifdef HAVE_GLOB_H + #include <glob.h> + #endif + + #ifdef HAVE_FNMATCH_H + #include <fnmatch.h> + #endif + #ifdef USE_THREADS #ifdef HAVE_SCHED_H *************** *** 102,105 **** --- 114,120 ---- #include <wctype.h> #include <iconv.h> + #include <fnmatch.h> + #include <glob.h> + #include <regex.h> #define HAVE_STRDUP 1 *************** *** 109,112 **** --- 124,131 ---- #define HAVE_MEMSET 1 + #define HAVE_FNMATCH 1 + #define HAVE_GLOB 1 + #define HAVE_REGCOMP 1 + #define HAVE_UNICODE 1 #define HAVE_LOCALE_H 1 *************** *** 5985,5988 **** --- 6004,6492 ---- } + /* filename globbing: *****************************************************/ + + FUNCTION(clib,fnmatch,argc,argv) + { + #ifdef HAVE_FNMATCH + char *pattern, *s; + if (argc == 2 && isstr(argv[0], &pattern) && isstr(argv[1], &s)) { + int res; + pattern = utf8_to_sys(pattern); s = utf8_to_sys(s); + if (!pattern || !s) { + if (pattern) free(pattern); if (s) free(s); + return __ERROR; + } + res = fnmatch(pattern, s, 0); + free(pattern); free(s); + if (res) + return mkfalse; + else + return mktrue; + } else + #endif + return __FAIL; + } + + FUNCTION(clib,glob,argc,argv) + { + #ifdef HAVE_GLOB + char *pattern; + if (argc == 1 && isstr(argv[0], &pattern)) { + glob_t g; + int res; + g.gl_offs = 0; + pattern = utf8_to_sys(pattern); + if (!pattern) return __ERROR; + res = glob(pattern, 0, NULL, &g); + free(pattern); + if (res == GLOB_NOMATCH) + return mknil; + else if (res) + return __FAIL; + else { + expr x = mknil; + int i = g.gl_pathc; + while (x && --i >= 0) + x = mkcons(mkstr(sys_to_utf8(g.gl_pathv[i])), x); + globfree(&g); + if (x) + return x; + else + return __ERROR; + } + } else + #endif + return __FAIL; + } + + /* regular expression matching: *******************************************/ + + #ifdef HAVE_REGCOMP + + /* regexp stack */ + + typedef struct { + unsigned done:1, global:2, matched:1; + int cflags, eflags; + regex_t rx; + regmatch_t *matches; + char *s, *p, *start; + } regstate_t; + + long regalloc = 0; + regstate_t *regstack = NULL, *regp = NULL; + char regmsg[BUFSZ]; + + #define REGALLOC 50 + + static int reg_push(void) + { + if (!regstack) + if ((regstack = malloc(REGALLOC*sizeof(regstate_t)))) { + regalloc = REGALLOC; + regp = regstack; + } else + return -1; + else if (!regp) + regp = regstack; + else if (regp-regstack+1 == regalloc) { + regstate_t *newstack = realloc(regstack, + (regalloc+REGALLOC)*sizeof(regstate_t)); + if (newstack) { + regstack = newstack; + regp = regstack+regalloc; + regalloc += REGALLOC; + } else + return -1; + } else + regp++; + regp->done = regp->global = regp->matched = 0; + regp->cflags = regp->eflags = 0; + regp->matches = NULL; + regp->s = regp->p = regp->start = NULL; + return 0; + } + + static void reg_pop(void) + { + if (!regp) return; + regfree(®p->rx); + if (regp->matches) free(regp->matches); + if (regp->s) free(regp->s); + if (regp > regstack) + regp--; + else + regp = NULL; + } + + /* push a new expression on the stack */ + + static int reg_add(char *pattern, char *s, int global, int cflags, int eflags) + { + int ret; + if (regp && regp->done) reg_pop(); + if (reg_push()) return -1; + regp->global = global; + regp->cflags = cflags; + regp->eflags = eflags; + ret = regcomp(®p->rx, pattern, REG_EXTENDED|cflags); + *regmsg = 0; + if (ret) { + regerror(ret, ®p->rx, regmsg, BUFSZ); + reg_pop(); + return ret; + } + if (!(regp->s = strdup(s))) { + reg_pop(); + return -1; + } + regp->p = regp->s; regp->start = NULL; + if (!(regp->matches = malloc((regp->rx.re_nsub+1)*sizeof(regmatch_t)))) { + reg_pop(); + return -1; + } + return 0; + } + + /* search */ + + static int reg_flags(char *p) + { + int flags; + flags = regp->eflags; + if (p > regp->s) + if (regp->cflags & REG_NEWLINE) + if (p[-1] == '\n') + flags &= ~REG_NOTBOL; + else + flags |= REG_NOTBOL; + else + flags |= REG_NOTBOL; + return flags; + } + + static int reg_search(void) + { + int ret; + char *prev; + while (regp && regp->done && regp>regstack) reg_pop(); + if (!regp) return -1; + if (regp->matched) + /* note the beginning of the previous match */ + prev = regp->start+regp->matches[0].rm_so; + regp->start = regp->p; + if (regp->global || !regp->matched) { + ret = regexec(®p->rx, regp->p, regp->rx.re_nsub+1, regp->matches, + reg_flags(regp->p)); + if (!ret) { + if (regp->matched) + if (regp->matches[0].rm_eo == regp->matches[0].rm_so && + regp->p == prev) + /* an extra empty match: if not at end of string then advance to the + next position and try again; otherwise simply ignore this match + and fail */ + if (*regp->p) { + int i; + /* this cannot fail since we can always match the empty string */ + ret = regexec(®p->rx, regp->p+1, regp->rx.re_nsub+1, + regp->matches, reg_flags(regp->p+1)); + /* translate offsets */ + for (i = 0; i <= regp->rx.re_nsub; i++) { + regp->matches[i].rm_so++; + regp->matches[i].rm_eo++; + } + } else + ret = REG_NOMATCH; + regp->matched = 1; + } + } else + ret = REG_NOMATCH; + *regmsg = 0; + if (ret) { + regp->done = 1; + regerror(ret, ®p->rx, regmsg, BUFSZ); + } else if (regp->global == 2 && + regp->matches[0].rm_eo > regp->matches[0].rm_so) + regp->p += regp->matches[0].rm_so+1; + else + regp->p += regp->matches[0].rm_eo; + return ret; + } + + /* stop search */ + + static void reg_done(void) + { + if (regp) { + regp->start = regp->p; + regp->done = 1; + } + } + + /* return matches */ + + static size_t reg_nmatches(void) + { + if (regp) + return regp->rx.re_nsub; + else + return 0; + } + + static long reg_start(void) + { + if (regp && regp->start) + return regp->start-regp->s; + else + return -1; + } + + static char *reg_skipstr(void) + { + if (regp && regp->start) + return regp->start; + else + return NULL; + } + + static long reg_pos(int i) + { + if (regp && regp->start && 0 <= i && i <= regp->rx.re_nsub) + if (!regp->done && regp->matches[i].rm_so >= 0) + return regp->start+regp->matches[i].rm_so-regp->s; + else + return -1; + else + return -1; + } + + static long reg_end(int i) + { + if (regp && regp->start && 0 <= i && i <= regp->rx.re_nsub) + if (!regp->done && regp->matches[i].rm_eo >= 0) + return regp->start+regp->matches[i].rm_eo-regp->s; + else + return -1; + else + return -1; + } + + static char *reg_str(int i) + { + if (regp && regp->start && 0 <= i && i <= regp->rx.re_nsub) + if (!regp->done && regp->matches[i].rm_so >= 0) + return regp->start+regp->matches[i].rm_so; + else + return NULL; + else + return NULL; + } + + #endif + + /* interface functions */ + + FUNCTION(clib,regmatch,argc,argv) + { + #ifdef HAVE_REGCOMP + char *opts, *regex, *s; + int cflags = 0, eflags = 0, global = 0, ret; + if (argc != 3 || !isstr(argv[0], &opts) || !isstr(argv[1], ®ex) || + !isstr(argv[2], &s)) + return __FAIL; + while (*opts) + switch (*(opts++)) { + case 'g': + if (!global) global = 1; + break; + case 'G': + global = 2; + break; + case 'i': + cflags |= REG_ICASE; + break; + case 'n': + cflags |= REG_NEWLINE; + break; + case '^': + eflags |= REG_NOTBOL; + break; + case '$': + eflags |= REG_NOTEOL; + break; + default: + return __FAIL; + } + regex = utf8_to_sys(regex); s = utf8_to_sys(s); + if (!regex || !s) { + if (regex) free(regex); if (s) free(s); + return __ERROR; + } + ret = reg_add(regex, s, global, cflags, eflags); + free(regex); free(s); + if (ret == -1) + return __ERROR; + else if (ret) + return mkapp(mksym(sym(regerr)), mkstr(sys_to_utf8(regmsg))); + ret = reg_search(); + if (ret == -1 || ret == REG_NOMATCH) + return mkfalse; + else if (ret) + return mkapp(mksym(sym(regerr)), mkstr(sys_to_utf8(regmsg))); + else + return mktrue; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regnext,argc,argv) + { + #ifdef HAVE_REGCOMP + int ret; + if (argc != 0) return __FAIL; + ret = reg_search(); + if (ret == -1 || ret == REG_NOMATCH) + return mkfalse; + else if (ret) + return mkapp(mksym(sym(regerr)), mkstr(sys_to_utf8(regmsg))); + else + return mktrue; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regdone,argc,argv) + { + #ifdef HAVE_REGCOMP + if (argc != 0) return __FAIL; + reg_done(); + return mkvoid; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regstart,argc,argv) + { + #ifdef HAVE_REGCOMP + long start; + if (argc != 0) return __FAIL; + start = reg_start(); + if (start >= 0) + return mkint(start); + else + return __FAIL; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regskip,argc,argv) + { + #ifdef HAVE_REGCOMP + char *skip; + if (argc != 0) return __FAIL; + if ((skip = reg_skipstr())) { + long start = reg_start(), pos = reg_pos(0); + char *s, *t; + if (pos >= start) + s = malloc(pos-start+1); + else + s = malloc(strlen(skip)+1); + if (!s) return __ERROR; + if (pos >= start) { + strncpy(s, skip, pos-start); + s[pos-start] = 0; + } else + strcpy(s, skip); + t = sys_to_utf8(s); free(s); + return mkstr(t); + } else + return __FAIL; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,reg,argc,argv) + { + #ifdef HAVE_REGCOMP + long i; + if (argc != 1 || !isint(argv[0], &i) || i < 0 || i > reg_nmatches()) + return __FAIL; + if (reg_start() >= 0) { + long pos = reg_pos(i), end = reg_end(i); + char *s, *t; + if (pos < 0 || end < 0) + s = strdup(""); + else if (!(s = malloc(end-pos+1))) + return __ERROR; + else { + strncpy(s, reg_str(i), end-pos); + s[end-pos] = 0; + } + t = sys_to_utf8(s); free(s); + return mkstr(t); + } else + return __FAIL; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regpos,argc,argv) + { + #ifdef HAVE_REGCOMP + long i; + if (argc != 1 || !isint(argv[0], &i) || i < 0 || i > reg_nmatches()) + return __FAIL; + if (reg_start() >= 0) + return mkint(reg_pos(i)); + else + return __FAIL; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regend,argc,argv) + { + #ifdef HAVE_REGCOMP + long i; + if (argc != 1 || !isint(argv[0], &i) || i < 0 || i > reg_nmatches()) + return __FAIL; + if (reg_start() >= 0) + return mkint(reg_end(i)); + else + return __FAIL; + #else + return __FAIL; + #endif + } + + FUNCTION(clib,regs,argc,argv) + { + #ifdef HAVE_REGCOMP + expr x; + size_t i; + if (argc != 0) return __FAIL; + x = mknil; + i = reg_nmatches(); + while (x && i > 0) { + if (reg_pos(i) >= 0 && reg_end(i) >= 0) + x = mkcons(mkint(i), x); + i--; + } + if (x) + return x; + else + return __ERROR; + #else + return __FAIL; + #endif + } + /* initialization: ********************************************************/ |