Hi!
Johannes,
I didn't know about this improved driver - thank you for your email.
I see that you implemented loading from csv - which is helpful but
not enough for all my use-cases. So I would probably start by creating
a slightly different version that loads from other sources...
Do you want me to include your code in bedlam unchanged together
with the patched PostgreSQL-JDBC-Driver (if that is the case please
send me the entire code and let me know the license - GPL2/MIT/etc are
all good enough for inclusion)? Or is there anything else that would
need to be implemented before that?
I will do my best to help - but just let me know if I understood
your request correctly
Best regards,
Igor.
On Thu, Apr 8, 2010 at 06:45, Johannes Bruegmann <joh...@jo...> wrote:
> Hello,
>
> may I ask wether there exists a "wishlist" for bedlam? If so, may I
> ask for the following feature?
>
> There is a special PostgreSQL-JDBC-Driver at
>
> http://kato.iki.fi/sw/db/postgresql/jdbc/copy/
>
> I use the JDBC3-Postresql 8.1+8.3 Version.
>
> It uses the COPY from/into feature of PostgreSQL. Since INSERT can be
> very slow for large data, this is a very efficient interface to copy
> data into db or from db. From what i know, this driver only extends
> the jdbc-interface and does no further changes. So it should be
> compliant to the other stuff.
>
> I have written some poor man code for that driver (maybe some parts
> are missing here, it is here just for illustration -- if wished, i can
> distribute it in full for writing a nicer interface or the like).
>
> The READ-TABLE and COPY-SELECT functionality are a little bit limited,
> because they depend on procedures MAKE-CSV-READER and CSV->LIST which
> are taken from Neil Van Dyke's cvs-scm (see here[tm]:
> http://www.neilvandyke.org/csv-scheme/) utilities (version at date of
> writing was 0.5), but taken all that into account COPY-SELECT (should
> be called make-copy-select) often is still faster than using normal
> JDBC-SELECT:
>
> (let* ((qrows "SELECT count(*)::INT FROM points")
> (convrows (lambda (access-field) (access-field 1)))
> (q "SELECT lcd, clid, tcd, stcd, junctionnumber as jno, rnid, n1id, n2id, seg_lcd FROM points")
> (conv (lambda (access-field)
> (let ((lcd (access-field "lcd"))
> (clid (access-field "clid"))
> (tcd (access-field "tcd"))
> (stcd (access-field "stcd"))
> (jno (access-field "jno"))
> (rnid (access-field "rnid"))
> (n1id (access-field "n1id"))
> (n2id (access-field "n2id"))
> (seg-lcd (access-field "seg_lcd")))
> (list lcd clid tcd stcd jno rnid n1id n2id seg-lcd))))
> (select-rows (jdbc/make-sql-select qrows convrows))
> (select-points1 (jdbc/make-sql-select q conv))
> (select-points2 (copy-select q conv)))
> (jdbc/with-connection (db-connect)
> (lambda (conn)
> (let ((rows (select-rows conn))
> (t1 (cadr (time (select-points1 conn))))
> (t2 (cadr (time (select-points2 conn)))))
> (display "Number of rows: ") (display (car rows)) (newline)
> (display (format "JDBC-SELECT: ~a ms\nCOPY-SELECT: ~a ms\n" (car t1) (car t2)))))))
>
> ===>
>
> Number of rows: 28531
> JDBC-SELECT: 23966 ms
> COPY-SELECT: 13352 ms
>
> The WRITE-TABLE stuff should be many times faster than INSERT.
>
> All this is only a suggestion, "wishes and hopes". Feel free to ignore
> it silently. Or ask if there is something missing below.
>
> Thank you for the bedlam stuff.
>
> Regards,
> Johannes Brügmann
>
> ------------------------------------------------------------------------------
> Code:
>
> (define-java-classes
> <java.sql.connection>
> <java.sql.prepared-statement>
> <java.sql.result-set>
> <java.sql.driver-manager>
> <java.sql.types>
> <org.postgresql.copy.copy-manager>)
>
> ;;;@args : proc -> conn -> (proc conn)
> ;;;taken from siscweb/jdbc
> ;;;executes (proc conn) inside an transaction
> (define (jdbc/call-with-transaction proc)
> (lambda (conn)
> (let ((jac (get-auto-commit conn)))
> (dynamic-wind
> (lambda ()
> (set-auto-commit conn (->jboolean #f)))
> (lambda ()
> (with/fc
> (lambda (m e)
> (rollback conn)
> (throw m e))
> (lambda ()
> (let ((result (proc conn)))
> (commit conn)
> result))))
> (lambda ()
> (set-auto-commit conn jac))))))
>
> ;;;@args : query input-port -> conn -> number-of-rows-affected
> ;;;copy bulk data into a database object using @code{COPY}
> ;;;the driver has to support that feature, for PostgreSQL there exists a patched jar-file
> ;;;if PostgreSQL server version < 8.2 this procedure returns @code{-1} otherwise the number of copied rows
> (define (jdbc/copy-into-db query input-port)
> (lambda (conn)
> (let ((jinp (->jinput-stream input-port))
> (jsql (->jstring query)))
> (let ((call-with-transaction
> (jdbc/call-with-transaction (lambda (conn)
> (let* ((cm (java-new <org.postgresql.copy.copy-manager> conn))
> (num-of-rows-copied (copy-into-db cm jsql jinp)))
> (->number num-of-rows-copied))))))
> (call-with-transaction conn)))))
>
> ;;;@args : query output-port -> conn -> number-of-rows-copied
> ;;;copy bulk data out of a database object using @code{COPY}
> ;;;the driver has to support that feature, for PostgreSQL there exists a patched jar-file
> ;;;if PostgreSQL server version < 8.2 this procedure returns @code{-1} otherwise the number of copied rows
> (define (jdbc/copy-from-db query output-port)
> (lambda (conn)
> (let ((joutp (->joutput-stream output-port))
> (jsql (->jstring query)))
> (let ((call-with-transaction
> (jdbc/call-with-transaction (lambda (conn)
> (let* ((cm (java-new <org.postgresql.copy.copy-manager> conn))
> (num-of-rows-copied (copy-from-db cm jsql joutp)))
> (->number num-of-rows-copied))))))
> (call-with-transaction conn)))))
>
> ;;;@body
> ;;;code token from @uref{http://sisc.cvs.sourceforge.net/sisc/contrib/pure-scheme/jdbc.scm}
> (define (list-index/copy-select ls e)
> (cond [(null? ls) #f]
> [(equal? (car ls) e) 0]
> [else (+ 1 (list-index/copy-select (cdr ls) e))]))
>
> ;;;@args : delim qmark -> list-of-list-of-fields -> input-port
> ;;;generates a @code{write-csv}-procedure which takes a list of list
> ;;;of fields converts and concats them into a string, and displays
> ;;;that string on an buffered-output-port. After the buffer is filled
> ;;;with all lines it is opened for read and the input port is returned.
> (define (make-write-csv delim qmark)
> (let ((write-csv-line (make-write-csv-line delim qmark)))
> (lambda (list-of-list-of-fields)
> (let ((dump-lines (lambda (output-port)
> (let ((char-output-port (open-character-output-port output-port)))
> (for-each (lambda (fields)
> (let ((line (write-csv-line fields)))
> (display line char-output-port)
> (newline char-output-port)))
> list-of-list-of-fields)
> (display "\\." char-output-port)
> (newline char-output-port)
> (flush-output-port char-output-port))
> output-port)))
> (let ((buffer (call-with-output-buffer dump-lines)))
> (let ((inp (open-input-buffer buffer)))
> inp))))))
>
> ;;;args : table-expr list-of-list-of-fields -> connection -> num-of-rows-affected
> ;;;Writes @var{list-of-list-of-fiels} linewise into
> ;;;@var{table-expr}. Upon successful completion the number of rows
> ;;;written is returned (see @code{jdbc/copy-into-db}).
> (define (write-table table-expr list-of-list-of-fields)
> (lambda (conn)
> (let ((query (string-append "COPY " table-expr " FROM STDIN WITH NULL AS 'SQL-NULL' CSV DELIMITER ';' QUOTE '\"'"))
> (write-csv (make-write-csv ";" "\"")))
> (let ((input-port (write-csv list-of-list-of-fields)))
> (let ((really-copy (jdbc/copy-into-db query input-port)))
> (really-copy conn))))))
>
> (define (make-read-csv-row delim qmark)
> (lambda (inp)
> (make-csv-reader inp `((separator-chars . (,(car (string->list delim))))
> (quote-char . ,(car (string->list qmark)))))))
>
> ;;;@args : delim qmark -> output-port -> list-of-list-of-fields
> ;;;generates a @code{read-csv}-procedure which takes an
> ;;;@code{buffered-output-port} as argument that receives the buffer of
> ;;;the attached @code{buffered-output-port}, opens an
> ;;;@code{buffered-input-port} upon that buffer, reads this buffer
> ;;;linewise for csv-data, returns each line parsed into fields as list
> ;;;of fields, and returns all lines as resulting list.
> (define (make-read-csv delim qmark)
> (let ((read-csv-row (make-read-csv-row delim qmark)))
> (lambda (output-port)
> (let ((dump-fields (lambda (input-port)
> (let ((char-input-port (open-character-input-port input-port)))
> (let ((next-row (read-csv-row char-input-port)))
> (csv->list next-row)))))
> (buffer (get-output-buffer output-port)))
> (let ((vals-list (call-with-input-buffer buffer dump-fields)))
> vals-list)))))
>
> ;;;@args : table-expr conv -> connection -> list-of-list-of-fields
> ;;;Reads @var{table-expr} and returns content as list of list of
> ;;;fields, where each field has type string, where @var{conv} is:
> ;;;@lisp
> ;;; conv: col-spec -> col-value
> ;;;@end lisp
> ;;;code partially derived from @uref{http://sisc.cvs.sourceforge.net/sisc/contrib/pure-scheme/jdbc.scm}
> (define (read-table table-expr conv)
> (lambda (conn)
> (let* ((tre (posix-string->regexp "^[ ]*\([a-zA-Z0-9_]\+\).*"))
> (tname (regexp-substitute/global #f tre table-expr 'pre 1 'post))
> (fre (posix-string->regexp "^[ ]*[a-zA-Z0-9_]\+[ ]*[(]\([^)]\+\)[)]"))
> (fnames (if (not (regexp-search fre table-expr))
> "*"
> (regexp-substitute/global #f fre table-expr 'pre 1 'post)))
> (query-md (string-append "SELECT " fnames " FROM " tname " LIMIT 1"))
> (stmt (jdbc/prepare-statement conn query-md #f))
> (rs (execute-query stmt))
> (md (get-meta-data rs))
> (cc (->number (get-column-count md)))
> (cn (map (lambda (i)
> (->string (get-column-label md (->jint i))))
> (cdr (iota (+ cc 1))))))
> (let ((types-conv (map (lambda (t)
> (cdr (assoc (->number (get-column-type md (->jint (+ t 1)))) type-conversions/copy-select)))
> (iota cc))))
> (let ((access-row (lambda (row)
> (lambda (field-id)
> (let* ((field-pos (cond ((number? field-id) field-id)
> ((string? field-id) (+ (list-index/copy-select cn field-id) 1))
> (else (error "columns are indexed by positive non-zero integers and by field-name strings."))))
> (conv-record (list-ref types-conv (- field-pos 1))))
> (if (or (null? conv-record) (java-null? conv-record))
> '()
> (conv-record (list-ref row (- field-pos 1)))))))))
> (let ((query (string-append "COPY " table-expr " TO STDOUT WITH NULL AS 'NULL' CSV DELIMITER ';' QUOTE '\"'"))
> (read-csv (make-read-csv ";" "\"")))
> (let ((output-port (open-output-buffer)))
> (let ((really-copy (jdbc/copy-from-db query output-port)))
> (really-copy conn)
> (let ((rows (read-csv output-port))
> (inc (lambda (x) (+ x 1))))
> (map (lambda (row)
> (conv (access-row row)))
> rows))))))))))
>
>
> ;;;@args : query conv -> connection -> list-of-list-of-fields
> ;;;Reads @var{query} and returns content as list of list of
> ;;;fields, where each field has type string, where @var{conv} is:
> ;;;@lisp
> ;;; conv: col-spec -> col-value
> ;;;@end lisp
> (define (copy-select query conv)
> (lambda (conn)
> (let ((tname (string-append "tmp_copy_select")))
> (let ((call-with-transaction
> (jdbc/call-with-transaction
> (lambda (conn)
> (let ((create-temp-table (string-append "CREATE TEMP TABLE " tname " AS " query))
> (drop-temp-table (string-append "DROP TABLE " tname)))
> (dynamic-wind
> (lambda ()
> (let ((create-temp-table/stmt (jdbc/prepare-statement conn create-temp-table #f)))
> (jdbc/execute-update create-temp-table/stmt)))
> (lambda ()
> (let* ((really-read-table (read-table tname conv))
> (rows (really-read-table conn)))
> rows))
> (lambda ()
> (let ((drop-temp-table/stmt (jdbc/prepare-statement conn drop-temp-table #f)))
> (jdbc/execute-update drop-temp-table/stmt)))))))))
> (call-with-transaction conn)))))
>
>
> --
> If we confess our sins, he is faithful and just to forgive us our sins
> and to cleanse us from all unrighteousness.
>
> 1 John 1:9 (ESV)
>
|