From: Ken A. <kan...@bb...> - 2003-10-02 13:31:41
|
This is an interesting approach, however it assumes that each field is fo= llowed by a delimiter. In the CSV format that EXCEL uses, the end of a f= ield is also indicated by the end of line. Also, in EXCEL, a field that = contains a delimiter will be wrapped in double quotes, like "this, and th= at", and a double quote is escaped by doubling it. Here's an approach i use: (define (csv-read port delimiter cell-action row-action) (define (!) (let ((c (read-char port))) c)) (define k1 (lambda () (state (!)))) (define k2 (lambda () (row-action k1))) (define (give-cell b k) (cell-action (list->string (reverse b)) k)) (define (state c) (cond ((eqv? c delimiter) (cell-action "" k1)) ((eqv? c #\") (state-string (!) '())) ((eqv? c #\newline) (row-action k1)) ((eof-object? c) #t) (else (state-any c '())))) (define (state-string c b) (cond ((eqv? c #\") (state-string-quote (!) b)) ((not (eof-object? c)) (state-string (!) (cons c b))))) (define (state-string-quote c b) (cond ((eqv? c #\") (state-string c (cons c b))) ; Escaped double quo= te. ((eqv? c delimiter) (give-cell b k1)) ((eqv? c #\newline) (give-cell b k2)) ((eof-object? c) (give-cell b k2)) (else (error "Single double quote at unexpected place.")))) (define (state-any c b) (cond ((eqv? c delimiter) (give-cell b k1)) ((eqv? c #\newline) (give-cell b k2)) ((eof-object? c) (give-cell b k2)) (else (state-any (!) (cons c b))))) (state (!))) This uses continuation passing style to separate the parsing from what the user does with each cell and row. (cell-action value k) is called with a value of the next cell and a continuation, k to resume the computation. (row-action k) is called at the end of a row, also with a continuation. The state... procedures are a tail recursive finite state machine. Here's an example of converting a csv file to a string of HTML: (define (csv->html port) (let ((result '("<html><table><tr>"))) (csv-read port #\, (lambda (value k) (set! result (cons "</td>" (cons value (cons "<td>" resul= t)))) (k)) (lambda (k) (set! result (cons "</tr><tr>" result)) (k))) (apply string-append (reverse (cons "</html>" result))))) k At 11:23 PM 9/30/2003 +0200, Wolfgang Jaehrling wrote: >Hi there! > >For those of you who want to read some interesting code, here is a >program to parse a file in CSV (Comma Separated Value) format. I >think it shows how one should use Scheme, but some might say it goes a >bit too far... (and I'd like to receive comments on this topic.) > >Note `READ-TABLE' can be called with the source port as argument, or >without an argument to use the current input port. > >;; Reading a table from a port where it resides in CSV format. >;; Copyright (C) 2003 Wolfgang J=E4hrling <wol...@pr...> >;; >;; This program is free software; you can redistribute it and/or modify >;; it under the terms of the GNU General Public License as published by >;; the Free Software Foundation; either version 2 of the License, or >;; (at your option) any later version. >;; >;; This program is distributed in the hope that it will be useful, >;; but WITHOUT ANY WARRANTY; without even the implied warranty of >;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >;; GNU General Public License for more details. > >(define field-delimiter #\,) > >;; Return a procedure that calls CONSUMER with three arguments: The >;; value returned by the PRODUCER applied to the procedures arguments, >;; a list that is initially empty, and a thunk to restart this process >;; with the value given by the PRODUCER added at the beginning of the >;; list given to the CONSUMER. >(define (collectrec producer consumer) > (lambda args > (letrec ((loop (lambda (lst) > (let ((x (apply producer args))) > (consumer x lst (lambda () > (loop (cons x lst)))))))) > (loop '())))) > >;; Read and return a field, that ends with the configured delimiter >;; character, or return false at the end of a line, or the eof-object >;; at end of file. >(define read-field > (collectrec read-char > (lambda (c chars loop) > (cond ((eof-object? c) c) > ((char=3D? c field-delimiter) > (apply string (reverse chars))) > ((char=3D? c #\newline) #f) > (else (loop)))))) > >;; Read a line and split it up into a list of fields which gets >;; returned, or false at the end of the file. >(define read-row > (collectrec read-field > (lambda (f fields loop) > (cond ((not f) (reverse fields)) > ((eof-object? f) #f) > (else (loop)))))) > >;; Read a table and return it as a list of rows, each row being a list >;; of fields, which are strings. >(define read-table > (collectrec read-row > (lambda (r rows loop) > (if (not r) > (reverse rows) > (loop))))) > >;;;; End of code. ;;;; > >Cheers, >GNU/Wolfgang > >--=20 >(define eq? (lambda (x y) #t)) ;; How could it be otherwise? > > >_______________________________________________ >Guile-user mailing list >Gui...@gn... >http://mail.gnu.org/mailman/listinfo/guile-user |