Thread: [q-lang-cvs] q-csv csv.c,1.14,1.15 csv.q,1.14,1.15
Brought to you by:
agraef
From: RER <ed...@us...> - 2008-01-25 22:09:53
|
Update of /cvsroot/q-lang/q-csv In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv12095 Modified Files: csv.c csv.q Log Message: code cleanups suggested by Albert Index: csv.c =================================================================== RCS file: /cvsroot/q-lang/q-csv/csv.c,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** csv.c 24 Jan 2008 22:26:07 -0000 1.14 --- csv.c 25 Jan 2008 22:09:46 -0000 1.15 *************** *** 1,3 **** ! /* CSV according to RFC 4180 (http://tools.ietf.rg/html/rfc4180.txt) This is free software; you can redistribute it and/or --- 1,4 ---- ! /* This file contains CSV reading and writing functions loosely based on ! Python's csv module (http://docs.python.org/lib/module-csv.html) This is free software; you can redistribute it and/or *************** *** 17,21 **** $Id$ ! Written by Eddie Rucker 3-22 Jan, 2008 */ #include <stdio.h> --- 18,22 ---- $Id$ ! Written by Eddie Rucker 3-25 Jan, 2008 */ #include <stdio.h> *************** *** 25,37 **** MODULE(csv); #define BSIZE 512 ! enum {FORCEQUOTE, AUTOQUOTE, NOQUOTE}; enum {CSV_DELIMITER, CSV_ESCAPE, CSV_QUOTE, CSV_QUOTING, CSV_LINETERMINATOR, CSV_SKIPSPACE}; ! /* fread_block reads embbeded '\n's. Does not account for badly formatted records. */ - FUNCTION (csv, fread_csvstr, argc, argv) { --- 26,46 ---- MODULE(csv); + static int quoting_style; + + INIT(csv) + { + quoting_style = sym(csv_quote_all); + } + #define BSIZE 512 ! enum {CSV_DELIMITER, CSV_ESCAPE, CSV_QUOTE, CSV_QUOTING, CSV_LINETERMINATOR, CSV_SKIPSPACE}; ! enum {CSV_QUOTE_ALL, CSV_QUOTE_STRINGS, CSV_QUOTE_NONE}; ! ! /* fread_csvstr reads embbeded '\n's. Does not account for badly formatted records. */ FUNCTION (csv, fread_csvstr, argc, argv) { *************** *** 91,95 **** double d; char *p, *t; ! if (cvt_f) { i = strtol(s, &p, 0); --- 100,104 ---- double d; char *p, *t; ! if (cvt_f) { i = strtol(s, &p, 0); *************** *** 100,128 **** return mkfloat(d); } ! if (!(t = strdup(s))) ! return NULL; ! return mkstr(t); } ! #define putfld(len) \ ! if (n_fld + len >= fld_sz) { \ ! if (!(tfld = (char *)realloc(fld, fld_sz <<= 1))) \ ! goto done; \ ! fld = tfld; \ ! } \ ! fldp = fld + n_fld; \ ! strncpy(fldp, s, len); \ ! n_fld += len \ ! /* qt == 0 forces the field to be quoted */ ! #define putrec(qt) \ ! *(fldp + 1) = 0; \ ! if (n_rec >= rec_sz - 1) { \ ! if (!(trec = (expr *)realloc(rec, (rec_sz += 64)*sizeof(expr)))) \ ! goto done; \ ! rec = trec; \ ! } \ ! if ((rec[n_rec++] = convert(fld, qt)) == NULL) \ goto done --- 109,135 ---- return mkfloat(d); } ! return !(t = strdup(s)) ? NULL : mkstr(t); } ! #define putfld(len) \ ! if (n_fld + len >= fld_sz) { \ ! if (!(tfld = (char *)realloc(fld, fld_sz <<= 1))) \ ! goto done; \ ! fld = tfld; \ ! } \ ! fldp = fld + n_fld; \ ! strncpy(fldp, s, len); \ ! n_fld += len \ ! /* qt == 0 forces the field to be quoted */ ! #define putrec(qt) \ ! *(fldp + 1) = 0; \ ! if (n_rec >= rec_sz - 1) { \ ! if (!(trec = (expr *)realloc(rec, (rec_sz += 64)*sizeof(expr)))) \ ! goto done; \ ! rec = trec; \ ! } \ ! if ((rec[n_rec++] = convert(fld, qt)) == NULL) \ goto done *************** *** 132,136 **** arg[0]: (Conversion flag, field delimeter char, string delimeter char) arg[1]: CSV formatted string ! output: tuple of fields --- 139,143 ---- arg[0]: (Conversion flag, field delimeter char, string delimeter char) arg[1]: CSV formatted string ! output: tuple of fields *************** *** 140,145 **** Notes: ! \r char is treated as white space except inside "" */ ! FUNCTION(csv, csvstr_to_tuple, argc, argv) { --- 147,152 ---- Notes: ! \r char is treated as white space except inside "" ! */ FUNCTION(csv, csvstr_to_tuple, argc, argv) { *************** *** 151,155 **** expr *xs, *rec, *trec; char *sp; ! if (argc != 2 || !istuple(argv[0], &n, &xs) --- 158,162 ---- expr *xs, *rec, *trec; char *sp; ! if (argc != 2 || !istuple(argv[0], &n, &xs) *************** *** 158,162 **** || !isstr(xs[CSV_ESCAPE], &escape) || !isstr(xs[CSV_QUOTE], "e) - || !isint(xs[CSV_QUOTING], "ing) || !isstr(xs[CSV_LINETERMINATOR], &lineterm) || !isbool(xs[CSV_SKIPSPACE], &skipspace_f) --- 165,168 ---- *************** *** 164,170 **** return __FAIL; if (!(fld = (char *)malloc(fld_sz))) return __ERROR; ! if (!(rec = (expr *)malloc(rec_sz*sizeof(expr)))) { free(fld); --- 170,180 ---- return __FAIL; + if ((quoting = exprsym(xs[CSV_QUOTING]) - quoting_style) < CSV_QUOTE_ALL + || quoting > CSV_QUOTE_NONE) + return __FAIL; + if (!(fld = (char *)malloc(fld_sz))) return __ERROR; ! if (!(rec = (expr *)malloc(rec_sz*sizeof(expr)))) { free(fld); *************** *** 185,189 **** if (!strncmp(s, delimiter, n_delimiter)) { *fldp = 0; ! putrec(FORCEQUOTE); s += n_delimiter; } else if (!strncmp(s, quote, n_quote)) { --- 195,199 ---- if (!strncmp(s, delimiter, n_delimiter)) { *fldp = 0; ! putrec(CSV_QUOTE_ALL); s += n_delimiter; } else if (!strncmp(s, quote, n_quote)) { *************** *** 210,214 **** sprintf(errmsg, "Column %d: Expected {%s}.", n_fld+1, quote); st = 20; ! } else if (!strncmp(s, escape, n_escape)) { s += n_escape; putfld(1); --- 220,224 ---- sprintf(errmsg, "Column %d: Expected {%s}.", n_fld+1, quote); st = 20; ! } else if (!strncmp(s, escape, n_escape)) { s += n_escape; putfld(1); *************** *** 225,238 **** st = 1; } else if (!strncmp(s, delimiter, n_delimiter)) { ! putrec(FORCEQUOTE); s += n_delimiter; st = 0; } else if (!*s || *s == EOF || !strncmp(s, lineterm, n_lineterm)) { ! putrec(FORCEQUOTE); st = 10; } else if (isspace(*s)) { ++s; st = 3; ! } else { sprintf(errmsg, "Column %d: Expected {%s}.", n_fld+1, delimiter); st = 20; --- 235,248 ---- st = 1; } else if (!strncmp(s, delimiter, n_delimiter)) { ! putrec(CSV_QUOTE_ALL); s += n_delimiter; st = 0; } else if (!*s || *s == EOF || !strncmp(s, lineterm, n_lineterm)) { ! putrec(CSV_QUOTE_ALL); st = 10; } else if (isspace(*s)) { ++s; st = 3; ! } else { sprintf(errmsg, "Column %d: Expected {%s}.", n_fld+1, delimiter); st = 20; *************** *** 241,249 **** case 3: if (!strncmp(s, delimiter, n_delimiter)) { ! putrec(FORCEQUOTE); s += n_delimiter; st = 0; } else if (!*s || *s == '\n' || *s == EOF) { ! putrec(FORCEQUOTE); st = 10; } else if (isspace(*s)) { --- 251,259 ---- case 3: if (!strncmp(s, delimiter, n_delimiter)) { ! putrec(CSV_QUOTE_ALL); s += n_delimiter; st = 0; } else if (!*s || *s == '\n' || *s == EOF) { ! putrec(CSV_QUOTE_ALL); st = 10; } else if (isspace(*s)) { *************** *** 279,319 **** } } ! done: ! free(fld); ! if (st == 10) { ! rec = realloc(rec, sizeof(expr)*n_rec); ! return mktuplev(n_rec, rec); ! } else { ! for (n = 0; n < n_rec; ++n) ! dispose(rec[n]); ! free(rec); ! if (st == 20) ! return mkapp(mksym(sym(csv_error)), mkstr(strdup(errmsg))); ! return __ERROR; ! } } ! #define resize_str \ ! if (len > sz) { \ ! if (!(ts = (char *)realloc(s, sz <<= 1))) { \ ! free(s); \ ! return __ERROR; \ ! } \ ! s = ts; \ ! } \ t = s + mrk ! #define insert \ ! mrk = len; \ ! len += strlen(tb); \ ! resize_str; \ strncpy(t, tb, len - mrk) /* convert tuple to csv string ! input: arg[0]: (Conversion flag, field delimeter char, string delimeter char) arg[1]: tuple to be converted ! output: CSV formatted string --- 289,327 ---- } } ! done: ! free(fld); ! if (st == 10) ! return mktuplev(n_rec, realloc(rec, sizeof(expr)*n_rec)); ! else { ! for (n = 0; n < n_rec; ++n) ! dispose(rec[n]); ! free(rec); ! return st==20 ? mkapp(mksym(sym(csv_error)), mkstr(strdup(errmsg))) ! : __ERROR; ! } } ! #define resize_str \ ! if (len > sz) { \ ! if (!(ts = (char *)realloc(s, sz <<= 1))) { \ ! free(s); \ ! return __ERROR; \ ! } \ ! s = ts; \ ! } \ t = s + mrk ! #define insert \ ! mrk = len; \ ! len += strlen(tb); \ ! resize_str; \ strncpy(t, tb, len - mrk) /* convert tuple to csv string ! input: arg[0]: (Conversion flag, field delimeter char, string delimeter char) arg[1]: tuple to be converted ! output: CSV formatted string *************** *** 323,328 **** Notes: ! \r char is treated as white space except inside "" */ ! FUNCTION (csv, tuple_to_csvstr, argc, argv) { --- 331,336 ---- Notes: ! \r char is treated as white space except inside "" ! */ FUNCTION (csv, tuple_to_csvstr, argc, argv) { *************** *** 342,355 **** || !isstr(ys[CSV_ESCAPE], &escape) || !isstr(ys[CSV_QUOTE], "e) - || !isint(ys[CSV_QUOTING], "ing) || !isstr(ys[CSV_LINETERMINATOR], &lineterm) || !isbool(ys[CSV_SKIPSPACE], &skipspace_f) ! || !istuple(argv[1], &n, &xs)) { return __FAIL; - } if (!(s = (char *)malloc(sz))) return __ERROR; ! n_escape = strlen(escape); n_quote = strlen(quote); --- 350,365 ---- || !isstr(ys[CSV_ESCAPE], &escape) || !isstr(ys[CSV_QUOTE], "e) || !isstr(ys[CSV_LINETERMINATOR], &lineterm) || !isbool(ys[CSV_SKIPSPACE], &skipspace_f) ! || !istuple(argv[1], &n, &xs)) ! return __FAIL; ! ! if ((quoting = exprsym(ys[CSV_QUOTING]) - quoting_style) < CSV_QUOTE_ALL ! || quoting > CSV_QUOTE_NONE) return __FAIL; if (!(s = (char *)malloc(sz))) return __ERROR; ! n_escape = strlen(escape); n_quote = strlen(quote); *************** *** 374,432 **** lineterm_cnt = 0; p = sval; ! if (skipspace_f && quoting == NOQUOTE) ! while (isspace(*p) && strncmp(p, quote, n_delimiter) && strncmp(p, delimiter, n_delimiter) ! && strncmp(p, lineterm, n_lineterm)) { ++p; } ! k = p - sval; ! mrk = len; while (*p) { if (!strncmp(p, quote, n_quote)) { ! ++quote_cnt; p += n_quote; ! len += n_escape + n_quote; ! } else if (!strncmp(p, delimiter, n_delimiter)) { ! ++delim_cnt; ! p += n_delimiter; ! len += n_delimiter; ! } else if (!strncmp(p, lineterm, n_lineterm)) { ! ++lineterm_cnt; ! p += n_lineterm; ! len += n_lineterm; ! } else { ! ++len; ! ++p; ! } ! } ! len += n_delimiter; ! p = sval + k; ! if (quoting == NOQUOTE && !(quote_cnt + delim_cnt + lineterm_cnt)) { ! resize_str; ! k = len-mrk-1; ! strncpy(t, p, k); ! t += k; ! } else { ! /* Add space for surrounding quotes */ ! len += n_quote << 1; ! resize_str; ! strncpy(t, quote, n_quote); ! t += n_quote; ! while (*p) { ! if (!strncmp(p, quote, n_quote)) { ! strncpy(t, escape, n_escape); ! t += n_escape; ! strncpy(t, quote, n_quote); ! t += n_quote; ! p += n_quote; ! } else ! *t++ = *p++; ! } ! strncpy(t, quote, n_quote); ! t += n_quote; } ! strncpy(t, delimiter, n_delimiter); ! t += n_delimiter; } else { sprintf(errmsg, "Field %d: Invalid conversion type.", i+1); --- 384,441 ---- lineterm_cnt = 0; p = sval; ! if (skipspace_f && quoting == CSV_QUOTE_NONE) ! while (isspace(*p) && strncmp(p, quote, n_delimiter) && strncmp(p, delimiter, n_delimiter) ! && strncmp(p, lineterm, n_lineterm)) ! ++p; ! k = p - sval; ! mrk = len; ! while (*p) { ! if (!strncmp(p, quote, n_quote)) { ! ++quote_cnt; ! p += n_quote; ! len += n_escape + n_quote; ! } else if (!strncmp(p, delimiter, n_delimiter)) { ! ++delim_cnt; ! p += n_delimiter; ! len += n_delimiter; ! } else if (!strncmp(p, lineterm, n_lineterm)) { ! ++lineterm_cnt; ! p += n_lineterm; ! len += n_lineterm; ! } else { ! ++len; ++p; } ! } ! len += n_delimiter; ! p = sval + k; ! if (quoting == CSV_QUOTE_NONE && !(quote_cnt+delim_cnt+lineterm_cnt)) { ! resize_str; ! k = len - mrk - 1; ! strncpy(t, p, k); ! t += k; ! } else { ! /* Add space for surrounding quotes */ ! len += n_quote << 1; ! resize_str; ! strncpy(t, quote, n_quote); ! t += n_quote; while (*p) { if (!strncmp(p, quote, n_quote)) { ! strncpy(t, escape, n_escape); ! t += n_escape; ! strncpy(t, quote, n_quote); ! t += n_quote; p += n_quote; ! } else ! *t++ = *p++; } ! strncpy(t, quote, n_quote); ! t += n_quote; ! } ! strncpy(t, delimiter, n_delimiter); ! t += n_delimiter; } else { sprintf(errmsg, "Field %d: Invalid conversion type.", i+1); *************** *** 437,446 **** len += n_lineterm; resize_str; ! strcpy(t,lineterm); ! if (!(t = strdup(s))) { ! free(s); ! return __ERROR; ! } ! free(s); ! return mkstr(t); } --- 446,450 ---- len += n_lineterm; resize_str; ! strcpy(t, lineterm); ! return mkstr((char *)realloc(s, len+1)); } Index: csv.q =================================================================== RCS file: /cvsroot/q-lang/q-csv/csv.q,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** csv.q 24 Jan 2008 22:26:07 -0000 1.14 --- csv.q 25 Jan 2008 22:09:47 -0000 1.15 *************** *** 22,46 **** Python's csv module (http://docs.python.org/lib/module-csv.html) */ - public extern fread_csvstr FILE QUOTE; - public extern tuple_to_csvstr ARGS REC; - public extern csvstr_to_tuple ARGS STR; - - public csv_error MSG; - /* User may define csv_error for custom error handling. */ - from dict import dict, insert, vals, member; ! public const var ! csv_delimiter = 0, ! csv_escape = 1, ! csv_quote = 2, ! csv_quoting = 3, ! csv_lineterminator = 4, ! csv_skipspace = 5; ! public const var ! csv_quote_all = 0, ! csv_quote_strings = 1, ! csv_quote_none = 2; /* Dialect Options --- 22,33 ---- Python's csv module (http://docs.python.org/lib/module-csv.html) */ from dict import dict, insert, vals, member; ! private extern fread_csvstr FILE QUOTE; ! private extern tuple_to_csvstr ARGS REC; ! private extern csvstr_to_tuple ARGS STR; ! /* User may define csv_error for custom error handling. */ ! public csv_error MSG; /* Dialect Options *************** *** 66,70 **** csv_skipspace: Skip white space flag. Defaults to true. Reading/Writing: If true, white spaces before fields are removed. ! Quoted fields always retain white space. */ /* QuoteStyle constants used by csv quoting. --- 53,60 ---- csv_skipspace: Skip white space flag. Defaults to true. Reading/Writing: If true, white spaces before fields are removed. ! Quoted fields always retain white space. ! */ ! public type DialectOption = const csv_delimiter, csv_escape, csv_quote, csv_quoting, ! csv_lineterminator, csv_skipspace; /* QuoteStyle constants used by csv quoting. *************** *** 72,78 **** csv_quote_strings: Quote only strings fields, numeric fields are not quoted. csv_quote_none: Only fields containing embedded field delimeters, line ! terminators, or escaped quotes are quoted. */ ! /* Defaults to RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt) */ def DEFAULTS = dict [csv_delimiter, ","; --- 62,70 ---- csv_quote_strings: Quote only strings fields, numeric fields are not quoted. csv_quote_none: Only fields containing embedded field delimeters, line ! terminators, or escaped quotes are quoted. ! */ ! public type QuoteStyle = const csv_quote_all, csv_quote_strings, csv_quote_none; ! /* Defaults are set to RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt) */ def DEFAULTS = dict [csv_delimiter, ","; *************** *** 145,149 **** public freadcsv ARGS; freadcsv (F:File, Dialect:Tuple) ! = csvstr_to_tuple Dialect $ fread_csvstr F (Dialect!csv_quote); freadcsv F:File = csvstr_to_tuple RFC4180 $ fread_csvstr F "\""; --- 137,141 ---- public freadcsv ARGS; freadcsv (F:File, Dialect:Tuple) ! = csvstr_to_tuple Dialect $ fread_csvstr F (Dialect!ord csv_quote); freadcsv F:File = csvstr_to_tuple RFC4180 $ fread_csvstr F "\""; |