refdb-cvs Mailing List for RefDB (Page 70)
Status: Beta
Brought to you by:
mhoenicka
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(47) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(88) |
Feb
(50) |
Mar
(36) |
Apr
(9) |
May
(30) |
Jun
(30) |
Jul
(22) |
Aug
(16) |
Sep
(42) |
Oct
(18) |
Nov
(29) |
Dec
(23) |
2005 |
Jan
(18) |
Feb
(28) |
Mar
(21) |
Apr
(35) |
May
(62) |
Jun
(22) |
Jul
(5) |
Aug
(40) |
Sep
(98) |
Oct
(81) |
Nov
(51) |
Dec
(62) |
2006 |
Jan
(46) |
Feb
(36) |
Mar
(8) |
Apr
(16) |
May
(14) |
Jun
(16) |
Jul
(47) |
Aug
(60) |
Sep
(34) |
Oct
(16) |
Nov
(46) |
Dec
(11) |
2007 |
Jan
(16) |
Feb
(13) |
Mar
(58) |
Apr
(32) |
May
(4) |
Jun
(8) |
Jul
(31) |
Aug
(46) |
Sep
(22) |
Oct
(30) |
Nov
(58) |
Dec
(15) |
2008 |
Jan
(8) |
Feb
(8) |
Mar
(2) |
Apr
(6) |
May
(3) |
Jun
(2) |
Jul
(1) |
Aug
|
Sep
|
Oct
(6) |
Nov
(3) |
Dec
(5) |
2009 |
Jan
(1) |
Feb
(20) |
Mar
(8) |
Apr
(5) |
May
(8) |
Jun
(3) |
Jul
(6) |
Aug
(4) |
Sep
(7) |
Oct
(8) |
Nov
(2) |
Dec
(1) |
2010 |
Jan
(1) |
Feb
(4) |
Mar
|
Apr
|
May
(1) |
Jun
(1) |
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
(1) |
Dec
|
2011 |
Jan
(5) |
Feb
(5) |
Mar
(13) |
Apr
(3) |
May
|
Jun
|
Jul
(4) |
Aug
|
Sep
(2) |
Oct
|
Nov
|
Dec
|
2012 |
Jan
(1) |
Feb
(1) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(5) |
Nov
|
Dec
(3) |
2013 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(1) |
Dec
(13) |
2014 |
Jan
(2) |
Feb
(2) |
Mar
(2) |
Apr
(4) |
May
(1) |
Jun
(1) |
Jul
|
Aug
(6) |
Sep
(3) |
Oct
|
Nov
(2) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(3) |
Sep
|
Oct
(1) |
Nov
(1) |
Dec
|
2016 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(2) |
Dec
|
2017 |
Jan
|
Feb
|
Mar
(5) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2018 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
|
Dec
|
2019 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2021 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
|
Dec
|
2022 |
Jan
(9) |
Feb
(16) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Markus H. <mho...@us...> - 2004-02-15 23:12:35
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15568 Modified Files: refdbdref.c Log Message: do a manual rollback if addref() fails; new function delete_ref_by_id() Index: refdbdref.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdref.c,v retrieving revision 1.63 retrieving revision 1.64 diff -u -U2 -r1.63 -r1.64 --- refdbdref.c 7 Feb 2004 14:54:40 -0000 1.63 +++ refdbdref.c 15 Feb 2004 23:05:02 -0000 1.64 @@ -296,4 +296,5 @@ struct addrisx_data ardata; struct lilimem sentinel; + struct DELRESULT delresult; sentinel.ptr_mem = NULL; @@ -458,4 +459,16 @@ XML_ParserFree(p); + /* do something intelligent in the case of a parse or mem error */ + if (!ullresult || ardata.ndb_error || ardata.nmem_error) { + if (strcmp(my_dbi_conn_get_cap(ardata.conn, "transaction"), "t")) { + /* we have to delete the junk reference manually */ + delete_ref_by_id(ardata.n_refdb_id, conn, ptr_clrequest, &delresult); + } + else { + my_dbi_conn_rollback(ardata.conn); + } + my_dbi_conn_unlock(ardata.conn); + } + if (ardata.msgpool && *(ardata.msgpool)) { /* send messages to client */ @@ -992,20 +1005,9 @@ int deleteref(struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult) { dbi_conn conn; - dbi_result dbires; - dbi_result dbires1; - dbi_result dbires2; - dbi_result dbires_user; int i; int numbyte; /* number of bytes written */ int error; - int result; int retval = 1; - unsigned long long n_periodical_id; - unsigned long long n_id; - unsigned long long n_xid; - unsigned long long numrows; size_t n_bufsize; - char sql_command[640]; /* fixed length is ok here since only IDs are appended to the query strings, and the length of an ID is limited by the database */ - char periodical_id[32]; /* will hold the ID until used */ char* id_list; const char* drivername; @@ -1090,226 +1092,283 @@ ptr_delresult->skipped = 0; - /* The general procedure to remove a reference is as follows: Remove - the entry in the main table t_refdb. Query the author, keyword, user - crosslink tables for authors, keywords, users used by this reference. - Find out whether these authors, keywords, users are used by any other - datasets. If not, delete them from the authors, keywords, users data - tables. In both cases, delete the entries in the crosslink tables */ - ptr_curr = &id_sentinel; /* loop over arguments */ while ((ptr_curr = get_next_lilid(ptr_curr)) != NULL) { - /* If the db server supports it, start a transaction. We want one - transaction per reference */ - if (my_dbi_conn_begin(conn)) { - LOG_PRINT(LOG_WARNING, "begin transaction failed"); - tiwrite(ptr_clrequest->fd, "begin transaction failed\n", TERM_NO); + if (!delete_ref_by_id(ptr_curr->value, conn, ptr_clrequest, ptr_delresult)) { retval = 0; - goto Finish; + break; } + } /* end while */ + + Finish: + if (conn && ptr_delresult->success) { + update_meta(conn, ptr_clrequest); + } + + dbi_conn_close(conn); + delete_all_lilid(&id_sentinel); + delete_all_lilimem(&sentinel); - /* lock the tables we'll write to to prevent concurrent writes - from different clients */ - if (my_dbi_conn_lock(conn)) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "cannot lock tables", TERM_NO); - LOG_PRINT(LOG_WARNING, "Cannot lock tables"); - retval = 0; - goto Finish; - } + return retval; +} - sprintf(sql_command, "SELECT refdb_id FROM t_refdb WHERE refdb_type != \'DUMMY\' AND refdb_id="ULLSPEC, (unsigned long long)(ptr_curr->value)); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires = dbi_conn_query(conn, sql_command); - if (!dbires) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + delete_ref_by_id(): deletes a reference by its ID value + + int delete_ref_by_id returns 0 if failed, 1 if successful + + unsigned long long idval ID value of the note to be deleted + + dbi_conn conn database connection + + struct CLIENT_REQUEST* ptr_clrequest ptr to structure with client info + + struct DELRESULT* ptr_delresult structure to hold number of successful and + failed deleterefs + + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +int delete_ref_by_id(unsigned long long idval, dbi_conn conn, struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult) { + char sql_command[640]; /* fixed length is ok here since only IDs are appended to the query strings, and the length of an ID is limited by the database */ + char periodical_id[32]; /* will hold the ID until used */ + int result; + unsigned long long n_id; + unsigned long long n_xid; + unsigned long long n_periodical_id; + unsigned long long numrows; + dbi_result dbires; + dbi_result dbires1; + dbi_result dbires2; + dbi_result dbires_user; + + /* The general procedure to remove a reference is as follows: Remove + the entry in the main table t_refdb. Query the author, keyword, user + crosslink tables for authors, keywords, users used by this reference. + Find out whether these authors, keywords, users are used by any other + datasets. If not, delete them from the authors, keywords, users data + tables. In both cases, delete the entries in the crosslink tables */ + + /* If the db server supports it, start a transaction. We want one + transaction per reference */ + if (my_dbi_conn_begin(conn)) { + LOG_PRINT(LOG_WARNING, "begin transaction failed"); + tiwrite(ptr_clrequest->fd, "begin transaction failed\n", TERM_NO); + return 0; + } + + /* lock the tables we'll write to to prevent concurrent writes + from different clients */ + if (my_dbi_conn_lock(conn)) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } + tiwrite(ptr_clrequest->fd, "cannot lock tables", TERM_NO); + LOG_PRINT(LOG_WARNING, "Cannot lock tables"); + return 0; + } - if (dbi_result_get_numrows(dbires) == 0) { - /* ID does not exist - nothing to do */ - sprintf(sql_command, "ID "ULLSPEC" not found\n", (unsigned long long)(ptr_curr->value)); - tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); - dbi_result_free(dbires); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - ptr_delresult->skipped++; - continue; + sprintf(sql_command, "SELECT refdb_id FROM t_refdb WHERE refdb_type != \'DUMMY\' AND refdb_id="ULLSPEC, (unsigned long long)idval); + LOG_PRINT(LOG_DEBUG, sql_command); + dbires = dbi_conn_query(conn, sql_command); + if (!dbires) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } + tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } + if (dbi_result_get_numrows(dbires) == 0) { + /* ID does not exist - nothing to do */ + sprintf(sql_command, "ID "ULLSPEC" not found\n", (unsigned long long)idval); + tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); dbi_result_free(dbires); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + ptr_delresult->skipped++; + return 1; + } + + dbi_result_free(dbires); - sprintf(sql_command, "SELECT user_id, xuser_id FROM t_xuser WHERE refdb_id="ULLSPEC, (unsigned long long)(ptr_curr->value)); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires_user = dbi_conn_query(conn, sql_command); - if (!dbires_user) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + sprintf(sql_command, "SELECT user_id, xuser_id FROM t_xuser WHERE refdb_id="ULLSPEC, (unsigned long long)idval); + LOG_PRINT(LOG_DEBUG, sql_command); + dbires_user = dbi_conn_query(conn, sql_command); + if (!dbires_user) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } + tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } - if (dbi_result_get_numrows(dbires_user) > 1) { - /* dataset is used by at least one other user, refuse to delete it */ - sprintf(sql_command, "ID "ULLSPEC" is still in use\n", (unsigned long long)(ptr_curr->value)); - tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - dbi_result_free(dbires_user); - ptr_delresult->skipped++; - continue; - } + if (dbi_result_get_numrows(dbires_user) > 1) { + /* dataset is used by at least one other user, refuse to delete it */ + sprintf(sql_command, "ID "ULLSPEC" is still in use\n", (unsigned long long)idval); + tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + dbi_result_free(dbires_user); + ptr_delresult->skipped++; + return 1; + } - /* retrieve periodical_id for later use */ - sprintf(sql_command, "SELECT refdb_periodical_id FROM t_refdb WHERE refdb_id="ULLSPEC, (unsigned long long)(ptr_curr->value)); - LOG_PRINT(LOG_DEBUG, sql_command); + /* retrieve periodical_id for later use */ + sprintf(sql_command, "SELECT refdb_periodical_id FROM t_refdb WHERE refdb_id="ULLSPEC, (unsigned long long)idval); + LOG_PRINT(LOG_DEBUG, sql_command); - dbires = dbi_conn_query(conn, sql_command); - if (!dbires) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); - dbi_result_free(dbires_user); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + dbires = dbi_conn_query(conn, sql_command); + if (!dbires) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } + tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); + dbi_result_free(dbires_user); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } - if (dbi_result_next_row(dbires) == 0) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); - dbi_result_free(dbires_user); - dbi_result_free(dbires); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_INFO, "ID requested for delete not found"); - ptr_delresult->skipped++; - continue; /* try next ID, this one most likely does not exist */ + if (dbi_result_next_row(dbires) == 0) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } + tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); + dbi_result_free(dbires_user); + dbi_result_free(dbires); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_INFO, "ID requested for delete not found"); + ptr_delresult->skipped++; + return 1; /* try next ID, this one most likely does not exist */ + } - if ((n_periodical_id = my_dbi_result_get_idval(dbires, "refdb_periodical_id")) != 0) { - sprintf(periodical_id, ULLSPEC, (unsigned long long)n_periodical_id); /* save ID */ + if ((n_periodical_id = my_dbi_result_get_idval(dbires, "refdb_periodical_id")) != 0) { + sprintf(periodical_id, ULLSPEC, (unsigned long long)n_periodical_id); /* save ID */ + } + else { + periodical_id[0] = '\0'; + } + + dbi_result_free(dbires); + + /* search orphans in t_keyword */ + result = remove_keyword_entries(idval, conn, 0); + + if (result) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } - else { - periodical_id[0] = '\0'; + if (result == 1) { + tiwrite(ptr_clrequest->fd, "select from t_xkeyword failed\n", TERM_NO); } + else if (result == 2) { + tiwrite(ptr_clrequest->fd, "delete from t_keyword failed\n", TERM_NO); + } + else if (result == 3) { + tiwrite(ptr_clrequest->fd, "delete from t_xkeyword failed\n", TERM_NO); + } + dbi_result_free(dbires_user); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + return 0; + } - dbi_result_free(dbires); - - /* search orphans in t_keyword */ - result = remove_keyword_entries(ptr_curr->value, conn, 0); + /* search orphans in t_author */ + result = remove_author_entries(idval, conn); - if (result) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - if (result == 1) { - tiwrite(ptr_clrequest->fd, "select from t_xkeyword failed\n", TERM_NO); - } - else if (result == 2) { - tiwrite(ptr_clrequest->fd, "delete from t_keyword failed\n", TERM_NO); - } - else if (result == 3) { - tiwrite(ptr_clrequest->fd, "delete from t_xkeyword failed\n", TERM_NO); - } - dbi_result_free(dbires_user); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - retval = 0; - goto Finish; + if (result) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); + } + if (result == 1) { + tiwrite(ptr_clrequest->fd, "select from t_xauthor failed\n", TERM_NO); + } + else if (result == 2) { + tiwrite(ptr_clrequest->fd, "delete from t_author failed\n", TERM_NO); } + else if (result == 3) { + tiwrite(ptr_clrequest->fd, "delete from t_xauthor failed\n", TERM_NO); + } + dbi_result_free(dbires_user); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + return 0; + } - /* search orphans in t_author */ - result = remove_author_entries(ptr_curr->value, conn); - if (result) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - if (result == 1) { - tiwrite(ptr_clrequest->fd, "select from t_xauthor failed\n", TERM_NO); - } - else if (result == 2) { - tiwrite(ptr_clrequest->fd, "delete from t_author failed\n", TERM_NO); - } - else if (result == 3) { - tiwrite(ptr_clrequest->fd, "delete from t_xauthor failed\n", TERM_NO); - } - dbi_result_free(dbires_user); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - retval = 0; - goto Finish; + /* remove orphans in t_xnote */ + result = remove_xnote_entries(idval, conn, 0 /*ref*/); + + if (result != 0 && result != 4) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); + } + if (result == 1) { + tiwrite(ptr_clrequest->fd, "select from t_xnote failed\n", TERM_NO); + } + else if (result == 3) { + tiwrite(ptr_clrequest->fd, "delete from t_xnote failed\n", TERM_NO); } + dbi_result_free(dbires_user); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + return 0; + } - /* remove orphans in t_xnote */ - result = remove_xnote_entries(ptr_curr->value, conn, 0 /*ref*/); + /* search orphans in t_periodical */ + if (n_periodical_id) { + result = remove_periodical_entries(n_periodical_id, conn); - if (result != 0 && result != 4) { + if (result) { if (ptr_clrequest->n_cgi) { iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } - if (result == 1) { - tiwrite(ptr_clrequest->fd, "select from t_xnote failed\n", TERM_NO); - } - else if (result == 3) { - tiwrite(ptr_clrequest->fd, "delete from t_xnote failed\n", TERM_NO); - } + tiwrite(ptr_clrequest->fd, "remove periodical failed\n", TERM_NO); dbi_result_free(dbires_user); my_dbi_conn_unlock(conn); my_dbi_conn_rollback(conn); - retval = 0; - goto Finish; + return 0; } + } + /* "delete" entry in main table. The entry is kept in the table to + prevent sequence problems when reimporting a database dump. + refdb_citekey must be unique, so we use a string representation + of refdb_id */ + /* sprintf(sql_command, "UPDATE t_refdb SET refdb_type='DUMMY',refdb_citekey='"ULLSPEC"',refdb_pubyear=0,refdb_secyear=0,refdb_startpage=NULL,refdb_endpage=NULL,refdb_abstract=NULL,refdb_title=NULL,refdb_volume=NULL,refdb_issue=NULL,refdb_booktitle=NULL,refdb_city=NULL,refdb_publisher=NULL,refdb_title_series=NULL,refdb_address=NULL,refdb_url=NULL,refdb_issn=NULL,refdb_pyother_info=NULL,refdb_secother_info=NULL,refdb_periodical_id=0,refdb_user1=NULL,refdb_user2=NULL,refdb_user3=NULL,refdb_user4=NULL,refdb_user5=NULL,refdb_misc1=NULL,refdb_misc2=NULL,refdb_misc3=NULL,refdb_linkpdf=NULL,refdb_linkfull=NULL,refdb_linkrel=NULL,refdb_linkimg=NULL WHERE refdb_id="ULLSPEC, (unsigned long long)(idval), (unsigned long long)(idval)); */ - /* search orphans in t_periodical */ - if (n_periodical_id) { - result = remove_periodical_entries(n_periodical_id, conn); - - if (result) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "remove periodical failed\n", TERM_NO); - dbi_result_free(dbires_user); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - retval = 0; - goto Finish; - } + sprintf(sql_command, "DELETE FROM t_refdb WHERE refdb_id="ULLSPEC, idval); + LOG_PRINT(LOG_DEBUG, sql_command); + dbires = dbi_conn_query(conn, sql_command); + if (!dbires) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); } + tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); + dbi_result_free(dbires_user); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } + dbi_result_free(dbires); - /* "delete" entry in main table. The entry is kept in the table to - prevent sequence problems when reimporting a database dump. - refdb_citekey must be unique, so we use a string representation - of refdb_id */ - sprintf(sql_command, "UPDATE t_refdb SET refdb_type='DUMMY',refdb_citekey='"ULLSPEC"',refdb_pubyear=0,refdb_secyear=0,refdb_startpage=NULL,refdb_endpage=NULL,refdb_abstract=NULL,refdb_title=NULL,refdb_volume=NULL,refdb_issue=NULL,refdb_booktitle=NULL,refdb_city=NULL,refdb_publisher=NULL,refdb_title_series=NULL,refdb_address=NULL,refdb_url=NULL,refdb_issn=NULL,refdb_pyother_info=NULL,refdb_secother_info=NULL,refdb_periodical_id=0,refdb_user1=NULL,refdb_user2=NULL,refdb_user3=NULL,refdb_user4=NULL,refdb_user5=NULL,refdb_misc1=NULL,refdb_misc2=NULL,refdb_misc3=NULL,refdb_linkpdf=NULL,refdb_linkfull=NULL,refdb_linkrel=NULL,refdb_linkimg=NULL WHERE refdb_id="ULLSPEC, (unsigned long long)(ptr_curr->value), (unsigned long long)(ptr_curr->value)); - -/* sprintf(sql_command, "DELETE FROM t_refdb WHERE refdb_id="ULLSPEC, ptr_curr->value); */ + /* search orphans in t_user. The query returning dbires_user ran above */ + while (dbi_result_next_row(dbires_user)) { + n_id = my_dbi_result_get_idval(dbires_user, "user_id"); + n_xid = my_dbi_result_get_idval(dbires_user, "xuser_id"); + sprintf(sql_command, "SELECT xuser_id FROM t_xuser WHERE user_id="ULLSPEC, (unsigned long long)n_id); LOG_PRINT(LOG_DEBUG, sql_command); - dbires = dbi_conn_query(conn, sql_command); - if (!dbires) { + dbires1 = dbi_conn_query(conn, sql_command); + if (!dbires1) { if (ptr_clrequest->n_cgi) { iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); @@ -1320,54 +1379,12 @@ my_dbi_conn_rollback(conn); LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + return 0; } - dbi_result_free(dbires); - - /* search orphans in t_user. The query returning dbires_user ran above */ - while (dbi_result_next_row(dbires_user)) { - n_id = my_dbi_result_get_idval(dbires_user, "user_id"); - n_xid = my_dbi_result_get_idval(dbires_user, "xuser_id"); - sprintf(sql_command, "SELECT xuser_id FROM t_xuser WHERE user_id="ULLSPEC, (unsigned long long)n_id); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires1 = dbi_conn_query(conn, sql_command); - if (!dbires1) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); - dbi_result_free(dbires_user); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; - } - numrows = dbi_result_get_numrows(dbires1); + numrows = dbi_result_get_numrows(dbires1); - if (numrows == 1) { /* if no other reference uses this user */ - /* delete entry in t_user table */ - sprintf(sql_command, "DELETE FROM t_user WHERE user_id="ULLSPEC, (unsigned long long)n_id); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires2 = dbi_conn_query(conn, sql_command); - if (!dbires2) { - if (ptr_clrequest->n_cgi) { - iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); - } - tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); - dbi_result_free(dbires1); - dbi_result_free(dbires_user); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; - } - dbi_result_free(dbires2); - } - - /* delete entry in xuser table */ - sprintf(sql_command, "DELETE FROM t_xuser WHERE xuser_id="ULLSPEC, (unsigned long long)n_xid); + if (numrows == 1) { /* if no other reference uses this user */ + /* delete entry in t_user table */ + sprintf(sql_command, "DELETE FROM t_user WHERE user_id="ULLSPEC, (unsigned long long)n_id); LOG_PRINT(LOG_DEBUG, sql_command); dbires2 = dbi_conn_query(conn, sql_command); @@ -1382,33 +1399,41 @@ my_dbi_conn_rollback(conn); LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + return 0; } dbi_result_free(dbires2); - dbi_result_free(dbires1); } - dbi_result_free(dbires_user); - ptr_delresult->success++; - - if (!ptr_clrequest->n_cgi) { - /* send message to client, reuse sql_command */ - sprintf(sql_command, "ID "ULLSPEC" successfully removed\n", (unsigned long long)(ptr_curr->value)); - tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); + /* delete entry in xuser table */ + sprintf(sql_command, "DELETE FROM t_xuser WHERE xuser_id="ULLSPEC, (unsigned long long)n_xid); + LOG_PRINT(LOG_DEBUG, sql_command); + dbires2 = dbi_conn_query(conn, sql_command); + if (!dbires2) { + if (ptr_clrequest->n_cgi) { + iwrite(ptr_clrequest->fd, cgihead_plain.text, cgihead_plain.length); + } + tiwrite(ptr_clrequest->fd, "deleteref failed\n", TERM_NO); + dbi_result_free(dbires1); + dbi_result_free(dbires_user); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; } - my_dbi_conn_unlock(conn); - my_dbi_conn_commit(conn); - } /* end for */ + dbi_result_free(dbires2); + dbi_result_free(dbires1); + } - Finish: - if (conn && ptr_delresult->success) { - update_meta(conn, ptr_clrequest); + dbi_result_free(dbires_user); + ptr_delresult->success++; + + if (!ptr_clrequest->n_cgi) { + /* send message to client, reuse sql_command */ + sprintf(sql_command, "ID "ULLSPEC" successfully removed\n", (unsigned long long)idval); + tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); } + my_dbi_conn_unlock(conn); + my_dbi_conn_commit(conn); - dbi_conn_close(conn); - delete_all_lilid(&id_sentinel); - delete_all_lilimem(&sentinel); - - return retval; + return 1; } |
From: Markus H. <mho...@us...> - 2004-02-15 23:10:14
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14811 Modified Files: refdbdnote.c Log Message: do a manual rollback if addnote() fails; new function delete_note_by_id() Index: refdbdnote.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdnote.c,v retrieving revision 1.23 retrieving revision 1.24 diff -u -U2 -r1.23 -r1.24 --- refdbdnote.c 4 Feb 2004 21:19:05 -0000 1.23 +++ refdbdnote.c 15 Feb 2004 23:02:42 -0000 1.24 @@ -87,7 +87,8 @@ const char *drivername; /* name of the libdbi driver */ char old_db[DBNAME_LENGTH+1]; + char sql_command[512]; struct addnotes_data andata; struct lilimem sentinel; - + struct DELRESULT delresult; sentinel.ptr_mem = NULL; sentinel.ptr_next = NULL; @@ -207,4 +208,16 @@ XML_ParserFree(p); + /* do something intelligent in the case of a parse or mem error */ + if (!ullresult || andata.ndb_error || andata.nmem_error) { + if (strcmp(my_dbi_conn_get_cap(andata.conn, "transaction"), "t")) { + /* we have to delete the junk reference manually */ + delete_note_by_id(andata.n_note_id, conn, ptr_clrequest, &delresult); + } + else { + my_dbi_conn_rollback(andata.conn); + } + my_dbi_conn_unlock(andata.conn); + } + if (andata.msgpool && *(andata.msgpool)) { /* send messages to client */ @@ -263,22 +276,13 @@ int deletenote(struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult) { dbi_conn conn; - dbi_result dbires; - dbi_result dbires1; - dbi_result dbires2; int i; int numbyte; /* number of bytes written */ int error; - int result; int retval = 1; unsigned long long n_periodical_id; unsigned long long n_id; unsigned long long n_xid; - unsigned long long n_user_id = 0; - unsigned long long numrows; size_t n_bufsize; - char sql_command[640]; /* fixed length is ok here since only IDs are appended to the query strings, and the length of an ID is limited by the database */ - char periodical_id[32]; /* will hold the ID until used */ char* id_list; - const char* owner; const char* drivername; struct lilimem sentinel; @@ -350,4 +354,52 @@ ptr_delresult->skipped = 0; + ptr_curr = &id_sentinel; + + /* loop over arguments */ + while ((ptr_curr = get_next_lilid(ptr_curr)) != NULL) { + if (!delete_note_by_id(ptr_curr->value, conn, ptr_clrequest, ptr_delresult)) { + retval = 0; + break; + } + } /* end while */ + + if (conn && ptr_delresult->success) { + update_meta(conn, ptr_clrequest); + } + + dbi_conn_close(conn); + delete_all_lilid(&id_sentinel); + delete_all_lilimem(&sentinel); + + return retval; +} + + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + delete_note_by_id(): deletes a note by its ID value + + int delete_note_by_id returns 0 if failed, 1 if successful + + unsigned long long idval ID value of the note to be deleted + + dbi_conn conn database connection + + struct CLIENT_REQUEST* ptr_clrequest ptr to structure with client info + + struct DELRESULT* ptr_delresult structure to hold number of successful and + failed deleterefs + + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +int delete_note_by_id(unsigned long long idval, dbi_conn conn, struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult) { + char sql_command[640]; /* fixed length is ok here since only IDs are appended to the query strings, and the length of an ID is limited by the database */ + char periodical_id[32]; /* will hold the ID until used */ + const char* owner; + int result; + unsigned long long n_user_id = 0; + unsigned long long numrows; + dbi_result dbires; + dbi_result dbires1; + dbi_result dbires2; + /* The general procedure to remove a note is as follows: Remove the entry in the main table t_note. Query the keyword, user @@ -357,42 +409,75 @@ tables. In all cases, delete the entries in the crosslink tables */ - ptr_curr = &id_sentinel; - /* loop over arguments */ - while ((ptr_curr = get_next_lilid(ptr_curr)) != NULL) { - /* If the db server supports it, start a transaction. We want one - transaction per reference */ - if (my_dbi_conn_begin(conn)) { - LOG_PRINT(LOG_WARNING, "begin transaction failed"); - tiwrite(ptr_clrequest->fd, "begin transaction failed\n", TERM_NO); - retval = 0; - goto Finish; - } + /* If the db server supports it, start a transaction. We want one + transaction per reference */ + if (my_dbi_conn_begin(conn)) { + LOG_PRINT(LOG_WARNING, "begin transaction failed"); + tiwrite(ptr_clrequest->fd, "begin transaction failed\n", TERM_NO); + return 0; + } - /* lock the tables we'll write to to prevent concurrent writes - from different clients */ - if (my_dbi_conn_lock(conn)) { - tiwrite(ptr_clrequest->fd, "cannot lock tables", TERM_NO); - LOG_PRINT(LOG_WARNING, "Cannot lock tables"); - retval = 0; - goto Finish; - } + /* lock the tables we'll write to to prevent concurrent writes + from different clients */ + if (my_dbi_conn_lock(conn)) { + tiwrite(ptr_clrequest->fd, "cannot lock tables", TERM_NO); + LOG_PRINT(LOG_WARNING, "Cannot lock tables"); + return 0; + } - sprintf(sql_command, "SELECT note_id FROM t_note WHERE note_id="ULLSPEC, ptr_curr->value); + sprintf(sql_command, "SELECT note_id FROM t_note WHERE note_id="ULLSPEC, idval); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires = dbi_conn_query(conn, sql_command); - if (!dbires) { - tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + LOG_PRINT(LOG_DEBUG, sql_command); + dbires = dbi_conn_query(conn, sql_command); + if (!dbires) { + tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } + + if (dbi_result_get_numrows(dbires) == 0) { + ptr_delresult->skipped++; + dbi_result_free(dbires); + my_dbi_conn_unlock(conn); + my_dbi_conn_commit(conn); + + if (!ptr_clrequest->n_cgi) { + /* send message to client, reuse sql_command */ + sprintf(sql_command, "NID "ULLSPEC" not found\n", (unsigned long long)idval); + tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); } + return 1; + } + + dbi_result_free(dbires); + + /* get user id and name */ + owner = NULL; - if (dbi_result_get_numrows(dbires) == 0) { + sprintf(sql_command, "SELECT note_user_id, user_name FROM t_note,t_user WHERE t_note.note_user_id=t_user.user_id AND note_id="ULLSPEC, idval); + + LOG_PRINT(LOG_DEBUG, sql_command); + dbires = dbi_conn_query(conn, sql_command); + if (!dbires) { + tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } + else if (dbi_result_next_row(dbires) != 0){ + n_user_id = my_dbi_result_get_idval_idx(dbires, 1); + owner = dbi_result_get_string_idx(dbires, 2); + } + else { + n_user_id = 0; + } + + if (owner && *owner) { + if (strcmp(owner, ptr_clrequest->username)) { + /* only owner can delete this note */ ptr_delresult->skipped++; - dbi_result_free(dbires); my_dbi_conn_unlock(conn); my_dbi_conn_commit(conn); @@ -400,91 +485,63 @@ if (!ptr_clrequest->n_cgi) { /* send message to client, reuse sql_command */ - sprintf(sql_command, "NID "ULLSPEC" not found\n", (unsigned long long)(ptr_curr->value)); + sprintf(sql_command, "Only the owner %s can remove ID "ULLSPEC"\n", owner, (unsigned long long)idval); tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); } - continue; + return 1; } + } - dbi_result_free(dbires); - - /* get user id and name */ - owner = NULL; + dbi_result_free(dbires); - sprintf(sql_command, "SELECT note_user_id, user_name FROM t_note,t_user WHERE t_note.note_user_id=t_user.user_id AND note_id="ULLSPEC, ptr_curr->value); + /* search orphans in t_keyword */ + result = remove_keyword_entries(idval, conn, 1); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires = dbi_conn_query(conn, sql_command); - if (!dbires) { - tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + if (result) { + if (result == 1) { + tiwrite(ptr_clrequest->fd, "select from t_xkeyword failed\n", TERM_NO); } - else if (dbi_result_next_row(dbires) != 0){ - n_user_id = my_dbi_result_get_idval_idx(dbires, 1); - owner = dbi_result_get_string_idx(dbires, 2); + else if (result == 2) { + tiwrite(ptr_clrequest->fd, "delete from t_keyword failed\n", TERM_NO); } - else { - n_user_id = 0; - } - - if (owner && *owner) { - if (strcmp(owner, ptr_clrequest->username)) { - /* only owner can delete this note */ - ptr_delresult->skipped++; - my_dbi_conn_unlock(conn); - my_dbi_conn_commit(conn); - - if (!ptr_clrequest->n_cgi) { - /* send message to client, reuse sql_command */ - sprintf(sql_command, "Only the owner %s can remove ID "ULLSPEC"\n", owner, (unsigned long long)(ptr_curr->value)); - tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); - } - continue; - } + else if (result == 3) { + tiwrite(ptr_clrequest->fd, "delete from t_xkeyword failed\n", TERM_NO); } + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + return 0; + } - dbi_result_free(dbires); - /* search orphans in t_keyword */ - result = remove_keyword_entries(ptr_curr->value, conn, 1); + /* search orphans in t_xnote */ + result = remove_xnote_entries(idval, conn, 4); - if (result) { - if (result == 1) { - tiwrite(ptr_clrequest->fd, "select from t_xkeyword failed\n", TERM_NO); - } - else if (result == 2) { - tiwrite(ptr_clrequest->fd, "delete from t_keyword failed\n", TERM_NO); - } - else if (result == 3) { - tiwrite(ptr_clrequest->fd, "delete from t_xkeyword failed\n", TERM_NO); - } - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - retval = 0; - goto Finish; + if (result == 1 || result == 3) { + if (result == 1) { + tiwrite(ptr_clrequest->fd, "select from t_xnote failed\n", TERM_NO); } - - - /* search orphans in t_xnote */ - result = remove_xnote_entries(ptr_curr->value, conn, 4); - - if (result) { - if (result == 1) { - tiwrite(ptr_clrequest->fd, "select from t_xnote failed\n", TERM_NO); - } - else if (result == 3) { - tiwrite(ptr_clrequest->fd, "delete from t_xnote failed\n", TERM_NO); - } - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - retval = 0; - goto Finish; + else if (result == 3) { + tiwrite(ptr_clrequest->fd, "delete from t_xnote failed\n", TERM_NO); } + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + return 0; + } + + /* delete entry in main table */ + sprintf(sql_command, "DELETE FROM t_note WHERE note_id="ULLSPEC, idval); + LOG_PRINT(LOG_DEBUG, sql_command); + dbires = dbi_conn_query(conn, sql_command); + if (!dbires) { + tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); + my_dbi_conn_unlock(conn); + my_dbi_conn_rollback(conn); + LOG_PRINT(LOG_WARNING, "query error"); + return 0; + } + dbi_result_free(dbires); - /* delete entry in main table */ - sprintf(sql_command, "DELETE FROM t_note WHERE note_id="ULLSPEC, ptr_curr->value); + /* search orphans in t_user */ + if (n_user_id) { + sprintf(sql_command, "SELECT xuser_id FROM t_xuser WHERE user_id="ULLSPEC, (unsigned long long)n_user_id); LOG_PRINT(LOG_DEBUG, sql_command); dbires = dbi_conn_query(conn, sql_command); @@ -494,67 +551,40 @@ my_dbi_conn_rollback(conn); LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; + return 0; } - dbi_result_free(dbires); - /* search orphans in t_user */ - if (n_user_id) { - sprintf(sql_command, "SELECT xuser_id FROM t_xuser WHERE user_id="ULLSPEC, (unsigned long long)n_user_id); + numrows = dbi_result_get_numrows(dbires); + + if (!numrows) { + /* no other note or reference belong to this user */ + /* delete entry in t_user table */ + sprintf(sql_command, "DELETE FROM t_user WHERE user_id="ULLSPEC, (unsigned long long)n_user_id); LOG_PRINT(LOG_DEBUG, sql_command); - dbires = dbi_conn_query(conn, sql_command); - if (!dbires) { + dbires1 = dbi_conn_query(conn, sql_command); + if (!dbires1) { tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); + dbi_result_free(dbires); my_dbi_conn_unlock(conn); my_dbi_conn_rollback(conn); LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; - } - - numrows = dbi_result_get_numrows(dbires); - - if (!numrows) { - /* no other note or reference belong to this user */ - /* delete entry in t_user table */ - sprintf(sql_command, "DELETE FROM t_user WHERE user_id="ULLSPEC, (unsigned long long)n_user_id); - LOG_PRINT(LOG_DEBUG, sql_command); - dbires1 = dbi_conn_query(conn, sql_command); - if (!dbires1) { - tiwrite(ptr_clrequest->fd, "deletenote failed\n", TERM_NO); - dbi_result_free(dbires); - my_dbi_conn_unlock(conn); - my_dbi_conn_rollback(conn); - LOG_PRINT(LOG_WARNING, "query error"); - retval = 0; - goto Finish; - } - dbi_result_free(dbires1); + return 0; } + dbi_result_free(dbires1); + } - dbi_result_free(dbires); - } /* end if n_user_id */ + dbi_result_free(dbires); + } /* end if n_user_id */ - ptr_delresult->success++; + ptr_delresult->success++; - if (!ptr_clrequest->n_cgi) { - /* send message to client, reuse sql_command */ - sprintf(sql_command, "ID "ULLSPEC" successfully removed\n", (unsigned long long)(ptr_curr->value)); - tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); - } - my_dbi_conn_unlock(conn); - my_dbi_conn_commit(conn); - } /* end while */ - - Finish: - if (conn && ptr_delresult->success) { - update_meta(conn, ptr_clrequest); + if (!ptr_clrequest->n_cgi) { + /* send message to client, reuse sql_command */ + sprintf(sql_command, "ID "ULLSPEC" successfully removed\n", (unsigned long long)idval); + tiwrite(ptr_clrequest->fd, sql_command, TERM_NO); } + my_dbi_conn_unlock(conn); + my_dbi_conn_commit(conn); - dbi_conn_close(conn); - delete_all_lilid(&id_sentinel); - delete_all_lilimem(&sentinel); - - return retval; + return 1; } |
From: Markus H. <mho...@us...> - 2004-02-15 23:07:34
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14023 Modified Files: refdbdbib.c Log Message: read_xml_data(): fixed communication protocol Index: refdbdbib.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdbib.c,v retrieving revision 1.35 retrieving revision 1.36 diff -u -U2 -r1.35 -r1.36 --- refdbdbib.c 4 Feb 2004 21:19:04 -0000 1.35 +++ refdbdbib.c 15 Feb 2004 23:00:02 -0000 1.36 @@ -2148,5 +2148,5 @@ LOG_PRINT(LOG_WARNING, "XML parse error"); sprintf(return_msg, "Parse error at line %d:\n%s\n", XML_GetCurrentLineNumber(p), XML_ErrorString(XML_GetErrorCode(p))); - tiwrite(fd, return_msg, TERM_NO); + tiwrite(fd, return_msg, TERM_YES); ptr_addresult->failure++; free(style_set); |
From: Markus H. <mho...@us...> - 2004-02-15 23:06:23
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13865 Modified Files: refdb-client.c Log Message: send_xml_data(): fixed communication protocol Index: refdb-client.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdb-client.c,v retrieving revision 1.30 retrieving revision 1.31 diff -u -U2 -r1.30 -r1.31 --- refdb-client.c 11 Feb 2004 23:42:05 -0000 1.30 +++ refdb-client.c 15 Feb 2004 22:58:50 -0000 1.31 @@ -645,4 +645,5 @@ /* printf("phase4 server reply:%s<<\n", inbuffer); */ if (numbyte == -1 || strcmp(inbuffer, "POS") != 0) { + tiwrite(n_sockfd, "QUIT", TERM_YES); if (numbyte == -1) { fprintf(errstream, "could not read from refdbd. Stop\n"); |
From: Markus H. <mho...@us...> - 2004-02-15 23:05:47
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13739 Modified Files: noteshandler.c Log Message: use unique temporary citation key to avoid problems with concurrent inserts Index: noteshandler.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/noteshandler.c,v retrieving revision 1.15 retrieving revision 1.16 diff -u -U2 -r1.15 -r1.16 --- noteshandler.c 10 Feb 2004 23:49:49 -0000 1.15 +++ noteshandler.c 15 Feb 2004 22:58:15 -0000 1.16 @@ -368,5 +368,6 @@ /* generate temporary key if necessary */ if (!*(ptr_andata->real_key)) { - sprintf(ptr_andata->real_key, ULLSPEC, (unsigned long long)0); + /* use pid to avoid problems with concurrent inserts */ + sprintf(ptr_andata->real_key, "dummy%d", getpid()); } @@ -933,5 +934,5 @@ /* insert new citekey if required */ - if (!strcmp(ptr_andata->real_key, "0") && ptr_andata->create_new) { + if (!strncmp(ptr_andata->real_key, "dummy", 5) && ptr_andata->create_new) { key = get_unique_citekey(ptr_andata->conn, ptr_andata->user, atoi(ptr_andata->year), ptr_andata->n_note_id, 1 /* notes */); @@ -974,5 +975,11 @@ if (strcmp(my_dbi_conn_get_cap(ptr_andata->conn, "transaction"), "t")) { /* we have to delete the junk reference manually */ - sprintf(sql_command, "DELETE FROM t_note WHERE note_key=\'DUMMY\'"); + if (*(ptr_andata->real_key)) { + sprintf(sql_command, "DELETE FROM t_note WHERE note_key=\'%s\'", ptr_andata->real_key); + } + else { + sprintf(sql_command, "DELETE FROM t_note WHERE note_key=\'dummy%d\'", getpid()); + } + LOG_PRINT(LOG_DEBUG, sql_command); dbires = dbi_conn_query(ptr_andata->conn, sql_command); |
From: Markus H. <mho...@us...> - 2004-02-15 23:04:02
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13045 Modified Files: refdbd.h.in Log Message: new functions delete_ref_by_id() and delete_note_by_id() Index: refdbd.h.in =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbd.h.in,v retrieving revision 1.5 retrieving revision 1.6 diff -u -U2 -r1.5 -r1.6 --- refdbd.h.in 17 Jan 2004 23:04:23 -0000 1.5 +++ refdbd.h.in 15 Feb 2004 22:56:30 -0000 1.6 @@ -108,4 +108,5 @@ int addref(struct CLIENT_REQUEST* ptr_clrequest, char* set_owner, struct ADDRESULT* ptr_addresult, int replace_ref, int n_keep_id, const char* reftype, Lilid* ptr_sentinel, char* input_encoding); int deleteref(struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult); +int delete_ref_by_id(unsigned long long idval, dbi_conn conn, struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult); unsigned long long getref(struct CLIENT_REQUEST* ptr_clrequest, struct bibinfo* ptr_biblio_info, int ref_format, int n_privatelist, char *pdfroot, char *cgi_url); int pickref(struct CLIENT_REQUEST* ptr_clrequest, int n_remove, struct DELRESULT* ptr_delresult); @@ -124,4 +125,5 @@ int addnote(struct CLIENT_REQUEST* ptr_clrequest, char* set_owner, struct ADDRESULT* ptr_addresult, int replace_note, Lilid* ptr_sentinel, char* input_encoding); int deletenote(struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult); +int deletenote(struct CLIENT_REQUEST* ptr_clrequest, struct DELRESULT* ptr_delresult); unsigned long long getnote(struct CLIENT_REQUEST* ptr_clrequest, struct bibinfo *ptr_biblio_info, int ref_format, int n_privatelist, char *cgi_url); int addlink(struct CLIENT_REQUEST* ptr_clrequest, char* set_owner, struct ADDRESULT* ptr_addresult, int n_remove); |
From: Markus H. <mho...@us...> - 2004-02-11 23:51:51
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv696 Modified Files: refdba.c refdbc.c refdbib.c Log Message: new send_xml_data(), send_ris_data() argument list Index: refdba.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdba.c,v retrieving revision 1.43 retrieving revision 1.44 diff -u -U2 -r1.43 -r1.44 --- refdba.c 31 Jan 2004 14:50:22 -0000 1.43 +++ refdba.c 11 Feb 2004 23:47:23 -0000 1.44 @@ -1801,5 +1801,5 @@ /* runs phases 1 through 5 of transfer protocol */ - n_xml_result = send_xml_data(infp, pagerfp, stderr, n_sockfd); + n_xml_result = send_xml_data(infp, pagerfp, stderr, n_sockfd, &byte_written); fclose(infp); Index: refdbc.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbc.c,v retrieving revision 1.68 retrieving revision 1.69 diff -u -U2 -r1.68 -r1.69 --- refdbc.c 11 Feb 2004 22:19:11 -0000 1.68 +++ refdbc.c 11 Feb 2004 23:47:23 -0000 1.69 @@ -3226,8 +3226,8 @@ /* runs phases 1 through 4 of communication protocol */ if (!strcmp(my_reftype, "ris")) { - n_send_result = send_ris_data(infp, pagerfp, errstream, n_sockfd, &ris_set_buffer, &n_setlength, &n_setcount, default_ris, n_cgi); + n_send_result = send_ris_data(infp, pagerfp, errstream, n_sockfd, &ris_set_buffer, &n_setlength, &n_setcount, default_ris, n_cgi, &byte_written); } else { - n_send_result = send_xml_data(infp, pagerfp, errstream, n_sockfd); + n_send_result = send_xml_data(infp, pagerfp, errstream, n_sockfd, &byte_written); } @@ -3309,5 +3309,5 @@ } else { - fwrite(inbuffer, sizeof(char), strlen(inbuffer), pagerfp); + byte_written += fwrite(inbuffer, sizeof(char), strlen(inbuffer), pagerfp); if (!n_cgi && !strcmp(my_reftype, "ris")) { sprintf(inbuffer, "%d dataset(s) sent.\n", n_setcount); @@ -3897,5 +3897,5 @@ /* runs phases 1 through 4 of communication protocol */ - n_send_result = send_xml_data(infp, pagerfp, errstream, n_sockfd); + n_send_result = send_xml_data(infp, pagerfp, errstream, n_sockfd, &byte_written); if (n_send_result) { @@ -3971,5 +3971,5 @@ } else { - fwrite(inbuffer, sizeof(char), strlen(inbuffer), pagerfp); + byte_written += fwrite(inbuffer, sizeof(char), strlen(inbuffer), pagerfp); } Index: refdbib.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbib.c,v retrieving revision 1.40 retrieving revision 1.41 diff -u -U2 -r1.40 -r1.41 --- refdbib.c 31 Jan 2004 14:50:22 -0000 1.40 +++ refdbib.c 11 Feb 2004 23:47:23 -0000 1.41 @@ -81,5 +81,5 @@ {"refdblib", ""}, {"startnumber", ""}, - {"encoding", ""}, + {"toencoding", ""}, {"", ""} }; @@ -783,5 +783,5 @@ /* run phases 1 through 4 of client/server protocol */ - n_result = send_xml_data(fp_infile, pagerfp, stderr, n_sockfd); + n_result = send_xml_data(fp_infile, pagerfp, stderr, n_sockfd, &byte_written); if (n_result) { |
From: Markus H. <mho...@us...> - 2004-02-11 23:46:33
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32265 Modified Files: refdb-client.c refdb-client.h Log Message: added variable to the argument list to pass the number of bytes written to pagerfp Index: refdb-client.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdb-client.c,v retrieving revision 1.29 retrieving revision 1.30 diff -u -U2 -r1.29 -r1.30 --- refdb-client.c 11 Feb 2004 22:21:14 -0000 1.29 +++ refdb-client.c 11 Feb 2004 23:42:05 -0000 1.30 @@ -535,6 +535,9 @@ int n_sockfd file descriptor of client/server socket connection + size_t* ptr_byte_written ptr to var that will be incremented by the number + of bytes written to a stream + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -int send_xml_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd) { +int send_xml_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd, size_t* ptr_byte_written) { int n_style_file_done = 0; int numbyte; @@ -544,9 +547,9 @@ size_t n_result; size_t n_chunksize; - size_t byte_written = 0; char* buffer; char thebytes[11]; char inbuffer[COMMAND_INBUF_LEN]; + n_chunksize = XMLPARSE_CHUNKSIZE; buffer = (char*)malloc(n_chunksize); @@ -662,5 +665,5 @@ we do not want to write the terminating \0 */ if (!n_broken_pipe && strcmp(inbuffer, "POS")) { - byte_written += fwrite(inbuffer, sizeof(char), numbyte-num_trailz, pagerfp); + *ptr_byte_written += fwrite(inbuffer, sizeof(char), numbyte-num_trailz, pagerfp); } /* printf("%s", inbuffer); */ @@ -707,6 +710,9 @@ int n_cgi if 1, assume CGI data + size_t* ptr_byte_written ptr to var that will be incremented by the number + of bytes written to a stream + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -int send_ris_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd, char** ptr_ris_set_buffer, size_t* ptr_n_setlength, int* ptr_n_setcount, char* default_ris, int n_cgi) { +int send_ris_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd, char** ptr_ris_set_buffer, size_t* ptr_n_setlength, int* ptr_n_setcount, char* default_ris, int n_cgi, size_t* ptr_byte_written) { int n_result; int n_read_done = 0; @@ -714,8 +720,8 @@ int numbyte; int num_trailz; - size_t byte_written = 0; char thebytes[11] = ""; char inbuffer[COMMAND_INBUF_LEN]; + do { /* ------------------------------------------------------------ */ @@ -805,5 +811,5 @@ if (!n_broken_pipe) { if (!n_cgi) { - byte_written += fwrite(inbuffer, sizeof(char), numbyte-num_trailz, pagerfp); + *ptr_byte_written += fwrite(inbuffer, sizeof(char), numbyte-num_trailz, pagerfp); } } Index: refdb-client.h =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdb-client.h,v retrieving revision 1.14 retrieving revision 1.15 diff -u -U2 -r1.14 -r1.15 --- refdb-client.h 31 Jan 2004 14:56:53 -0000 1.14 +++ refdb-client.h 11 Feb 2004 23:42:05 -0000 1.15 @@ -37,6 +37,6 @@ int listvalue (char* arg, char* command, char* help_string, char* err_string); int getsimplelist(struct simplelistvals* slvals, int n_with_summary); -int send_xml_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd); -int send_ris_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd, char** ptr_ris_set_buffer, size_t* ptr_n_setlength, int* ptr_n_setcount, char* default_ris, int n_cgi); +int send_xml_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd, size_t* ptr_byte_written); +int send_ris_data(FILE *infp, FILE *pagerfp, FILE *errstream, int n_sockfd, char** ptr_ris_set_buffer, size_t* ptr_n_setlength, int* ptr_n_setcount, char* default_ris, int n_cgi, size_t* ptr_byte_written); void pipehandler(int sig); void inthandler(int sig); |
From: Markus H. <mho...@us...> - 2004-02-11 22:25:39
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14649 Modified Files: refdb-client.c Log Message: ask_for_passwd(): terminate password string even if fgets fails to do so Index: refdb-client.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdb-client.c,v retrieving revision 1.28 retrieving revision 1.29 diff -u -U2 -r1.28 -r1.29 --- refdb-client.c 4 Feb 2004 21:01:13 -0000 1.28 +++ refdb-client.c 11 Feb 2004 22:21:14 -0000 1.29 @@ -138,4 +138,7 @@ fprintf(stderr, "\n"); + /* be paranoid */ + passwd[PASSWD_LENGTH] = '\0'; + /* remove newline if necessary */ pw_len = strlen(passwd)-1; |
From: Markus H. <mho...@us...> - 2004-02-11 22:23:37
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13972 Modified Files: refdbc.c Log Message: fixed buffer size mismatches in addref() and addnote() Index: refdbc.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbc.c,v retrieving revision 1.67 retrieving revision 1.68 diff -u -U2 -r1.67 -r1.68 --- refdbc.c 4 Feb 2004 21:04:09 -0000 1.67 +++ refdbc.c 11 Feb 2004 22:19:11 -0000 1.68 @@ -3508,5 +3508,5 @@ /* the following is a temporary hack to allow cmdln_tokenize to work */ - newarg = (char*)malloc((size_t)(strlen(arg)+8)); + newarg = (char*)malloc((size_t)(strlen(arg)+9)); if (newarg == NULL) { delete_all_lilimem(&sentinel); @@ -4400,5 +4400,5 @@ /* the following is a temporary hack to allow cmdln_tokenize to work */ - newarg = (char*)malloc((size_t)(strlen(arg)+8)); + newarg = (char*)malloc((size_t)(strlen(arg)+9)); if (newarg == NULL) { delete_all_lilimem(&sentinel); |
From: Markus H. <mho...@us...> - 2004-02-11 20:44:58
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21450 Modified Files: risdb.c Log Message: fixed nasty typo that screwed up dealing with journal custom abbrev 2 Index: risdb.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/risdb.c,v retrieving revision 1.39 retrieving revision 1.40 diff -u -U2 -r1.39 -r1.40 --- risdb.c 29 Jan 2004 01:42:48 -0000 1.39 +++ risdb.c 11 Feb 2004 20:40:36 -0000 1.40 @@ -1766,5 +1766,5 @@ dbires1 = dbi_conn_query(conn, sql_command); - if (dbires1) { + if (!dbires1) { dbi_result_free(dbires); LOG_PRINT(LOG_WARNING, "update J2 failed"); |
From: Markus H. <mho...@us...> - 2004-02-10 23:55:12
|
Update of /cvsroot/refdb/tutorial In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29637 Modified Files: refdbtutorial.sgml Log Message: updated for 0.9.4 Index: refdbtutorial.sgml =================================================================== RCS file: /cvsroot/refdb/tutorial/refdbtutorial.sgml,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -U2 -r1.1.1.1 -r1.2 --- refdbtutorial.sgml 9 Feb 2004 20:52:24 -0000 1.1.1.1 +++ refdbtutorial.sgml 10 Feb 2004 23:51:31 -0000 1.2 @@ -16,4 +16,8 @@ <revhistory> <revision> + <revnumber>1.1</revnumber> + <date>2004-02-08</date> + </revision> + <revision> <revnumber>1.0</revnumber> <date>2002-11-22</date> @@ -49,12 +53,14 @@ defaultdb refs pager less +toencoding ISO-8859-1 +fromencoding ISO-8859-1 </programlisting> <para>The line starting with the hash sign (#) is a <emphasis>comment</emphasis>. You can add more comment lines if you want to, or leave out the comment line in the example if you remember what that file is good for anyway.</para> <para>If you (or your administrator) don't feel comfortable with storing your password in a plain-text file, you can use an asterisk "*" instead. This will cause the &appname; clients to ask for the password interactively at startup.</para> - <para>Now what does the configuration file <filename moreinfo="none">.&appname;ibrc</filename> look like? Incidentally it is the same, because we don't use any advanced features of these programs yet. You can simply create a copy and name it <filename moreinfo="none">.&appname;ibrc</filename>.</para> + <para>Now what does the configuration file <filename moreinfo="none">.&appname;ibrc</filename> look like? It is the same with the exception of the last line. You can simply create a copy and name it <filename moreinfo="none">.&appname;ibrc</filename>, remove the last line, and you're done.</para> </sect1> </chapter> <chapter> - <title>Managing references</title> + <title>Managing references and notes</title> <para>Now everything is ready to do some real work. We'll go ahead and learn how to use the reference management client.</para> <sect1> @@ -142,11 +148,11 @@ <sect1 id="sect-adding-refs"> <title>Adding references</title> - <para>If you're new to &appname; and don't have a database yet, you'll want to start by adding a couple of references. This chapter first teaches you how to add references in the main input format RIS. The following sections cover the import of data from various data sources like PubMed, BibTeX, or Z39.50 servers.</para> + <para>If you're new to &appname; and don't have a database yet, you'll want to start by adding a couple of references. This chapter first teaches you how to add references in the main input formats RIS and risx. The subsequent sections cover the import of data from various data sources like PubMed, BibTeX, or Z39.50 servers.</para> <tip> <para>If you're really new to &appname; but have access to an existing database, e.g. the one your department built, you might want to get acquainted with &appname; by <link linkend="sect-finding-refs">retrieving existing references</link>. Retrieving references does not alter the database, so this is safe to play around. Once you feel comfortable, return to this section.</para> </tip> <sect2> - <title>Adding RIS datasets</title> - <para>The RIS format is a plain-text tagged file format used by most Windows- and Mac-based reference management tools. This is currently the only reference data format that &appname; understands natively. All other data formats supported by &appname; must be converted to RIS using the conversion tools described in the following sections.</para> + <title>How to create RIS datasets</title> + <para>The RIS format is a plain-text tagged file format used by most Windows- and Mac-based reference management tools. A variety of other data formats supported by &appname; can be converted to RIS using the conversion tools described in the following sections.</para> <sect3> <title>What a RIS dataset looks like</title> @@ -586,138 +592,88 @@ </variablelist> </sect3> - <sect3 id="sect-global-personal-fields"> - <title>Global and personal fields</title> - <para>&appname; differs from other reference management tools because a main goal of its design is to encourage people to share their references. However, you may have figured from the tag list above that some of these entries only make sense if they can be maintained by each user individually. This is precisely the approach used by &appname;: The "hard" bibliographic data are global and identical for each user. The "soft" personal data, which are the only ones likely to change after the reference was added anyway, are maintained for each user individually. These personal fields are:</para> - <itemizedlist> - <listitem> - <para>The reprint status (RP)</para> - </listitem> - <listitem> - <para>The availability field (AV)</para> - </listitem> - <listitem> - <para>The notes field (N1)</para> - </listitem> - </itemizedlist> - <para>Even if you use one of the import filters described below or if you use RIS files exported from other bibliographic software, you should take the time to fill these fields with useful values. If you don't specify values, the AV and N1 field will be blank (this is ok), and the RP field will have the default value "NOT IN FILE".</para> - </sect3> <sect3> - <title>Adding vs. updating references</title> - <para>In most cases you have a new set of references and want to add them to your database. This is no big deal if the data comply with the RIS specification outlined above. Assuming your references are stored in a file <filename moreinfo="none">newrefs.ris</filename> in the present working directory, all you need to do is:</para> - <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref newrefs.ris</userinput></screen> - <para>To simply add the references in the example RIS file, use this command:</para> - <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref /usr/local/share/&appname;/examples/testrefs.ris</userinput></screen> - <para>&appname;c will try to add the references stored in that file to your default database. The diagnostic messages will be displayed in your pager, so if you send a dozen or so references it might take a few seconds until the results are displayed. You'll see a message for each reference found in the input file: Which ID was assigned, which citation key was used (or generated if you didn't specify one), and whether the operation was successful. Adding references usually fails only for two reasons:</para> - <itemizedlist> - <listitem> - <para>The input data didn't contain useful RIS datasets.</para> - </listitem> - <listitem> - <para>A citation key was specified in the reference but the same citation key already exists in the database. &appname; will refuse to accept the reference because citation keys <emphasis>must</emphasis> be unique. You can fix this by changing or deleting the offending citation key in the input file.</para> - </listitem> - </itemizedlist> - <para>Once a reference is added to the database, you might still feel the urge to change it. Be it that you would like to add further keywords or that your personal information, like the reprint status, have changed since. The most straightforward way to change a reference is to retrieve it in RIS format, save it to a file, edit it, and send the updated copy back to where it came from. The following sequence of commands achieves this:</para> - <note> - <para>In this example we'll assume that you already know the ID of the reference that you want to change. You'll learn <link linkend="sect-finding-refs">later</link> how to find a reference by all sorts of criteria like authors, keywords and the like. This section also has additional information on the <command moreinfo="none">getref</command> command used here.</para> - </note> - <screen format="linespecific"> -<prompt moreinfo="none">&appname;c: </prompt><userinput moreinfo="none">getref -t ris -o editme.ris :ID:=7</userinput> -2595 byte written to /usr/home/markus/refdb/editme.ris -<prompt moreinfo="none">&appname;c: </prompt> -</screen> - <para>Now you can use your favourite text editor to change the file <filename moreinfo="none">editme.ris</filename> as you see fit. For this exercise we'll just change the reprint status (the RP field) from "NOT IN FILE" to "IN FILE". When you're done, save the file and go back to &appname;c:</para> - <screen format="linespecific"> -<prompt moreinfo="none">&appname;c: </prompt><userinput moreinfo="none">updateref -P editme.ris</userinput> -Updating set 0 successful -1 dataset(s) updated, 0 added, 0 failed -1 dataset(s) sent. -</screen> - <para>Now what was that <option>-P</option> switch good for? This switch tells &appname; that it should only update your personal information of this reference, i.e. the RP, AV, and N1 fields. This is a lot faster than updating the whole reference. It is also more secure as you might have changed the file somewhere else accidentally without noticing. On the other hand, if you e.g. correct a typo you noticed in the title (TI) field, you <emphasis>must not</emphasis> use the <option>-P</option> switch.</para> - </sect3> - </sect2> - <sect2> - <title>Creating a RIS dataset from scratch</title> - <para>As noted previously, you can use any text editor that creates Unix-style line endings (linefeed) to create and edit RIS files. &appname; ships a ris-mode for Emacs which makes the task a little more pleasant. Ask your administrator whether this mode is available on your system.</para> - <para>Creating a dataset is basically monkey business. The only intelligence required is to use the correct tags for your chunks of bibliographic information. Use the example RIS file as guidelines for the most common reference types journal article, book chapter, and book. We'll look at a few issues related to these references first.</para> - <para>The first issue is certainly the type of the reference. There are pretty clear cases if you look at the list, but there's some bordercases too which are not covered by the predefined types. You should keep in mind that &appname; does not restrict the fields available for a particular reference type. You can fill any available field for any reference type. The only restriction is that the bibliography styles for most reference types use only a subset of the available fields. E.g. a bibliography entry of a journal article will not show a series editor even if you filled in the A3 field, whereas the bibliography entry of a book chapter might show the series editor if the book the chapter is published in is a part of a series.</para> - <para>The general rule is to use the closest matching type, to be consistent in this decision (all similar bordercases should use the same type), and to use the GEN type if nothing else helps. Most bibliography styles display all available fields for the GEN type.</para> - <para>To people new to bibliographic software, the various levels of titles and authors is often confusing. RIS offers three levels of authors:</para> - <variablelist> - <varlistentry> - <term>AU (synonym: A1)</term> - <listitem> - <para>The author of a publication. This is the person (or the persons) responsible for the smallest unit of the publication you're looking at.</para> - </listitem> - </varlistentry> - <varlistentry> - <term>ED (synonym: A2)</term> - <listitem> - <para>The editor of a collection of publications.</para> - </listitem> - </varlistentry> - <varlistentry> - <term>A3</term> - <listitem> - <para>The editor of a series of collections of publications.</para> - </listitem> - </varlistentry> - </variablelist> - <para>Lets consider a few examples. If your reference contains a journal article, published in some scientific journal, the AU fields contain the names of those who wrote the article. The same holds true for the authors of a chapter published in a book like "Methods in Enzymology". The chapter authors would be in the AU fields, the volume editors in the ED field. The editors of the whole "Methods in Enzymology" series of books would be entered in A3 fields. However, if your reference points to one particular volume of "Methods in Enzymology" as a whole, you'd rather put the volume editors in the AU fields. The same logic holds true for the title fields:</para> - <variablelist> - <varlistentry> - <term>TI (synonym: T1)</term> - <listitem> - <para>The title of the smallest unit of publication you're looking at.</para> - </listitem> - </varlistentry> - <varlistentry> - <term>A2</term> - <listitem> - <para>The title of the collection of publications</para> - </listitem> - </varlistentry> + <title>Creating a RIS dataset from scratch</title> + <para>As noted previously, you can use any text editor that creates Unix-style line endings (linefeed) to create and edit RIS files. &appname; ships a ris-mode for Emacs which makes the task a little more pleasant. Ask your administrator whether this mode is available on your system.</para> + <para>Creating a dataset is basically monkey business. The only intelligence required is to use the correct tags for your chunks of bibliographic information. Use the example RIS file as guidelines for the most common reference types journal article, book chapter, and book. We'll look at a few issues related to these references first.</para> + <para>The first issue is certainly the type of the reference. There are pretty clear cases if you look at the list, but there's some bordercases too which are not covered by the predefined types. You should keep in mind that &appname; does not restrict the fields available for a particular reference type. You can fill any available field for any reference type. The only restriction is that the bibliography styles for most reference types use only a subset of the available fields. E.g. a bibliography entry of a journal article will not show a series editor even if you filled in the A3 field, whereas the bibliography entry of a book chapter might show the series editor if the book the chapter is published in is a part of a series.</para> + <para>The general rule is to use the closest matching type, to be consistent in this decision (all similar bordercases should use the same type), and to use the GEN type if nothing else helps. Most bibliography styles display all available fields for the GEN type.</para> + <para>To people new to bibliographic software, the various levels of titles and authors is often confusing. RIS offers three levels of authors:</para> + <variablelist> + <varlistentry> + <term>AU (synonym: A1)</term> + <listitem> + <para>The author of a publication. This is the person (or the persons) responsible for the smallest unit of the publication you're looking at.</para> + </listitem> + </varlistentry> + <varlistentry> + <term>ED (synonym: A2)</term> + <listitem> + <para>The editor of a collection of publications.</para> + </listitem> + </varlistentry> <varlistentry> - <term>A3</term> - <listitem> - <para>The title of the series of collections of publications</para> - </listitem> - </varlistentry> - </variablelist> - <para>Using our previous example, an article published in "Methods in Enzymology" might have a TI field "An apparatus to turn urine into gold". The T2 field would be the title of the volume, e.g. "Alchemy and related techniques", whereas the T3 field would contain "Methods in Enzymology". However, if your reference points to the "Alchemy and related techniques" volume as a whole, this title would go into the T1 field.</para> - </sect2> - <sect2 id="sect-retrieve-pubmed"> - <title>Retrieving datasets from PubMed</title> - <para>The primary source of reference data in the biomedical field is the PubMed database maintained by the <ulink url="http://www.ncbi.nlm.nih.gov">National Center for Biotechnology Information</ulink>. This section shows the simplest and most common way to retrieve bibliographic information about interesting articles from this database using a web browser (other methods use web service clients or email subscription services, but this is beyond the scope of this tutorial).</para> - <para>After visiting the site with your favourite web browser, select "PubMed" from the drop-down box called "Search" and type a query in the provided field. Something like "Doe J 2002" to find articles published by J. Doe in 2002. After hitting "Enter" you'll receive a list of publications matching your query. Select the ones you're interested in by clicking the check box right next to the publication (convenience beating logic, you can also check <emphasis>none</emphasis> of the boxes in order to retrieve <emphasis>all</emphasis> publications). Select "XML" from the drop-down box next to the <guibutton moreinfo="none">Display</guibutton> button and hit the latter. You'll receive the list of the publications in the Pubmed XML format. Now click the <guibutton moreinfo="none">Save</guibutton> button on that page and save the information to a plain-text file, e.g. <filename moreinfo="none">pm001.xml</filename>. You could then return to the search, run a few more queries, and save your results in additional files according to this pattern.</para> - <para>We'll use the Perl script <command moreinfo="none">med2ris.pl</command> to turn our XML data into RIS data. This tool either reads Pubmed data from standard input or from files specified as arguments. The result will be sent to standard output, so you can either view it with a pager or write it to a file.</para> - <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">med2ris.pl < pm001.xml | less</userinput></screen> - <para>This command converts the data in the file <filename moreinfo="none">pm001.xml</filename> and displays the result in a pager.</para> - <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">med2ris.pl pm*.xml > pm.ris</userinput></screen> - <para>This command converts all files that match the pattern <filename moreinfo="none">pm*.xml</filename>, like <filename moreinfo="none">pm001.xml</filename> or <filename moreinfo="none">pmnew.xml</filename>, and writes the resulting RIS datasets to <filename moreinfo="none">pm.ris</filename>.</para> - <para>Now you should add your personal information to each dataset, as outlined <link linkend="sect-global-personal-fields">above</link>. Then you could go ahead and add the references to your default database with &appname;c:</para> - <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt>addref pm.ris</screen> - </sect2> - <sect2> - <title>Importing BibTeX datasets</title> - <para>If you have a BibTeX database that you want to import into &appname;, you'll have to convert these data to RIS first. This is again best done by using one of the converters shipped with &appname;. The tool <command moreinfo="none">bib2ris</command> will by default convert all standard BibTeX fields to the RIS equivalents. If you used non-standard fields in your BibTeX database, <command moreinfo="none">bib2ris</command> can be configured to import these too, but this requires additional entries in the <filename moreinfo="none">~/.bib2risrc</filename> configuration file. The manual has all the details, but for the purposes of this tutorial we'll assume that you only use an "ABSTRACT" field as the only additional non-standard field.</para> - <para>Just like &appname;c, bib2ris reads configuration files at startup which you can modify to permanently set some defaults. The syntax of the configuration file is the same as outlined <link linkend="sect-config-files">above</link>, but the only line we have to enter at this time is the following:</para> - <programlisting format="linespecific"> + <term>A3</term> + <listitem> + <para>The editor of a series of collections of publications.</para> + </listitem> + </varlistentry> + </variablelist> + <para>Lets consider a few examples. If your reference contains a journal article, published in some scientific journal, the AU fields contain the names of those who wrote the article. The same holds true for the authors of a chapter published in a book like "Methods in Enzymology". The chapter authors would be in the AU fields, the volume editors in the ED field. The editors of the whole "Methods in Enzymology" series of books would be entered in A3 fields. However, if your reference points to one particular volume of "Methods in Enzymology" as a whole, you'd rather put the volume editors in the AU fields. The same logic holds true for the title fields:</para> + <variablelist> + <varlistentry> + <term>TI (synonym: T1)</term> + <listitem> + <para>The title of the smallest unit of publication you're looking at.</para> + </listitem> + </varlistentry> + <varlistentry> + <term>A2</term> + <listitem> + <para>The title of the collection of publications</para> + </listitem> + </varlistentry> + <varlistentry> + <term>A3</term> + <listitem> + <para>The title of the series of collections of publications</para> + </listitem> + </varlistentry> + </variablelist> + <para>Using our previous example, an article published in "Methods in Enzymology" might have a TI field "An apparatus to turn urine into gold". The T2 field would be the title of the volume, e.g. "Alchemy and related techniques", whereas the T3 field would contain "Methods in Enzymology". However, if your reference points to the "Alchemy and related techniques" volume as a whole, this title would go into the T1 field.</para> + </sect3> + <sect3 id="sect-retrieve-pubmed"> + <title>Retrieving datasets from PubMed</title> + <para>The primary source of reference data in the biomedical field is the PubMed database maintained by the <ulink url="http://www.ncbi.nlm.nih.gov">National Center for Biotechnology Information</ulink>. This section shows the simplest and most common way to retrieve bibliographic information about interesting articles from this database using a web browser (other methods use web service clients or email subscription services, but this is beyond the scope of this tutorial).</para> + <para>After visiting the site with your favourite web browser, select "PubMed" from the drop-down box called "Search" and type a query in the provided field. Something like "Doe J 2002" to find articles published by J. Doe in 2002. After hitting "Enter" you'll receive a list of publications matching your query. Select the ones you're interested in by clicking the check box right next to the publication (convenience beating logic, you can also check <emphasis>none</emphasis> of the boxes in order to retrieve <emphasis>all</emphasis> publications). Select "XML" from the drop-down box next to the <guibutton moreinfo="none">Display</guibutton> button and hit the latter. You'll receive the list of the publications in the Pubmed XML format. Now click the <guibutton moreinfo="none">Save</guibutton> button on that page and save the information to a plain-text file, e.g. <filename moreinfo="none">pm001.xml</filename>. You could then return to the search, run a few more queries, and save your results in additional files according to this pattern.</para> + <para>We'll use the Perl script <command moreinfo="none">med2ris.pl</command> to turn our XML data into RIS data. This tool either reads Pubmed data from standard input or from files specified as arguments. The result will be sent to standard output, so you can either view it with a pager or write it to a file.</para> + <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">med2ris.pl < pm001.xml | less</userinput></screen> + <para>This command converts the data in the file <filename moreinfo="none">pm001.xml</filename> and displays the result in a pager.</para> + <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">med2ris.pl pm*.xml > pm.ris</userinput></screen> + <para>This command converts all files that match the pattern <filename moreinfo="none">pm*.xml</filename>, like <filename moreinfo="none">pm001.xml</filename> or <filename moreinfo="none">pmnew.xml</filename>, and writes the resulting RIS datasets to <filename moreinfo="none">pm.ris</filename>.</para> + <para>Now you should add your personal information to each dataset, as outlined <link linkend="sect-global-personal-fields">above</link>. Then you could go ahead and add the references to your default database with &appname;c:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt>addref pm.ris</screen> + </sect3> + <sect3> + <title>Importing BibTeX datasets</title> + <para>If you have a BibTeX database that you want to import into &appname;, you'll have to convert these data to RIS first. This is again best done by using one of the converters shipped with &appname;. The tool <command moreinfo="none">bib2ris</command> will by default convert all standard BibTeX fields to the RIS equivalents. If you used non-standard fields in your BibTeX database, <command moreinfo="none">bib2ris</command> can be configured to import these too, but this requires additional entries in the <filename moreinfo="none">~/.bib2risrc</filename> configuration file. The manual has all the details, but for the purposes of this tutorial we'll assume that you only use an "ABSTRACT" field as the only additional non-standard field.</para> + <para>Just like &appname;c, bib2ris reads configuration files at startup which you can modify to permanently set some defaults. The syntax of the configuration file is the same as outlined <link linkend="sect-config-files">above</link>, but the only line we have to enter at this time is the following:</para> + <programlisting format="linespecific"> nsf_abstract N2 -</programlisting> - <para>This will tell bib2ris to import your non-standard abstract field (this is case-insensitive, so your BibTeX file might use ABSTRACT or Abstract as well) into the N2 RIS field. Use the following command to see the results:</para> - <screen><prompt>~$ </prompt>bib2ris < myrefs.bib | less</screen> - <para>This command will convert the contents of <filename>myrefs.bib</filename> in the current directory and display the result in a pager.</para> - <screen><prompt>~$ </prompt>bib2ris *.bib > myrefs.ris</screen> - <para>This command reads the data from all <filename>*.bib</filename> files in the current directory and redirects the output into the file <filename>myrefs.ris</filename>.</para> - <para>Now there is a little issue with the data generated by bib2ris: they still contain all TeX markup that you may have used in your input data. If you want to use &appname; only to maintain references for LaTeX files, this is probably ok, but if you want to use the data for SGML and XML documents too, it is necessary to strip the TeX markup before adding the references to the database. To this end, run the RIS file through the Perl script <command moreinfo="none">tex2mail</command> shipped with &appname;.</para> - <screen><prompt>~$ </prompt>tex2mail -noindent -ragged -linelength 65535 -ris < myrefs.ris > myrefs-notex.ris</screen> - <para>As described in the <link linkend="sect-retrieve-pubmed">previous section</link>, you should now add your personal information and then use &appname;c to add the datasets to your database.</para> - </sect2> - <sect2> - <title>Retrieving datasets from a Z39.50 server</title> - <para>Many libraries allow remote access to their electronic catalogs via the Z39.50 protocol. With a suitable client you can search the catalogs and retrieve the bibliographic information of interesting references to your local computer. For this tutorial we'll use the free client provided in the <ulink url="http://www.indexdata.dk/yaz/">YAZ toolkit</ulink>, although you could use any other client as well. One of the largest libraries accessible via the Z39.50 protocol is the <ulink url="http://www.loc.gov">Library of Congress</ulink>. We'll try to find computer books written by some Mr. Knuth in this library.</para> - <para>The following command connects to the library using the host name "z3950.loc.gov", the port 7090, and the database name "Voyager". All this information is usually provided either online or in a printed pamphlet about electronic catalog access published by libraries offering Z39.50 services:</para> - <screen format="linespecific"> -<prompt>~$ </prompt><userinput>yaz-client z3950.loc.gov:7090/Voyager</userinput> + </programlisting> + <para>This will tell bib2ris to import your non-standard abstract field (this is case-insensitive, so your BibTeX file might use ABSTRACT or Abstract as well) into the N2 RIS field. Use the following command to see the results:</para> + <screen><prompt>~$ </prompt>bib2ris < myrefs.bib | less</screen> + <para>This command will convert the contents of <filename>myrefs.bib</filename> in the current directory and display the result in a pager.</para> + <screen><prompt>~$ </prompt>bib2ris *.bib > myrefs.ris</screen> + <para>This command reads the data from all <filename>*.bib</filename> files in the current directory and redirects the output into the file <filename>myrefs.ris</filename>.</para> + <para>Now there is a little issue with the data generated by bib2ris: they still contain all TeX markup that you may have used in your input data. If you want to use &appname; only to maintain references for LaTeX files, this is probably ok, but if you want to use the data for SGML and XML documents too, it is necessary to strip the TeX markup before adding the references to the database. To this end, run the RIS file through the Perl script <command moreinfo="none">tex2mail</command> shipped with &appname;.</para> + <screen><prompt>~$ </prompt>tex2mail -noindent -ragged -linelength 65535 -ris < myrefs.ris > myrefs-notex.ris</screen> + <para>As described in the <link linkend="sect-retrieve-pubmed">previous section</link>, you should now add your personal information and then use &appname;c to add the datasets to your database.</para> + </sect3> + <sect3> + <title>Retrieving datasets from a Z39.50 server</title> + <para>Many libraries allow remote access to their electronic catalogs via the Z39.50 protocol. With a suitable client you can search the catalogs and retrieve the bibliographic information of interesting references to your local computer. For this tutorial we'll use the free client provided in the <ulink url="http://www.indexdata.dk/yaz/">YAZ toolkit</ulink>, although you could use any other client as well. One of the largest libraries accessible via the Z39.50 protocol is the <ulink url="http://www.loc.gov">Library of Congress</ulink>. We'll try to find computer books written by some Mr. Knuth in this library.</para> + <para>The following command connects to the library using the host name "z3950.loc.gov", the port 7090, and the database name "Voyager". All this information is usually provided either online or in a printed pamphlet about electronic catalog access published by libraries offering Z39.50 services:</para> + <screen format="linespecific"> + <prompt>~$ </prompt><userinput>yaz-client z3950.loc.gov:7090/Voyager</userinput> Sent initrequest. Connection accepted by target. @@ -729,6 +685,6 @@ <prompt>Z> </prompt> </screen> - <para>Now you can go ahead and type a query. The full query syntax of Z39.50 is beyond the scope of this tutorial, but the following query retrieves entries with the authorname "knuth" and the topic "computer":</para> - <screen format="linespecific"> + <para>Now you can go ahead and type a query. The full query syntax of Z39.50 is beyond the scope of this tutorial, but the following query retrieves entries with the authorname "knuth" and the topic "computer":</para> + <screen format="linespecific"> <prompt>Z> </prompt><userinput>f @and @attr 1=1003 knuth @attr 1=4 @attr 5=1 computer</userinput> Sent searchRequest. @@ -740,5 +696,5 @@ <prompt>Z> </prompt> </screen> - <para>We've found 28 entries that match our search pattern. We could just go ahead and display some or all of them, but we'd like to write them to a file, so we let our client dump all retrieved references to the file <filename moreinfo="none">knuth.loc.usmarc</filename> and make sure the data are retrieved as "usmarc" (again, the library should be able to inform you which formats are available). Other formats acceptable for &appname; are "ukmarc" and "unimarc".</para> + <para>We've found 28 entries that match our search pattern. We could just go ahead and display some or all of them, but we'd like to write them to a file, so we let our client dump all retrieved references to the file <filename moreinfo="none">knuth.loc.usmarc</filename> and make sure the data are retrieved as "usmarc" (again, the library should be able to inform you which formats are available). Other formats acceptable for &appname; are "ukmarc" and "unimarc".</para> <screen format="linespecific"> <prompt>Z> </prompt><userinput>set_marcdump knuth.loc.usmarc</userinput> @@ -746,5 +702,5 @@ <prompt>Z> </prompt> </screen> - <para>Now we retrieve all of the matching entries. The <command moreinfo="none">show</command> command uses an argument like X+Y, where X is the record number where the retrieval should start and Y is the number of consecutive records to be retrieved. The data will be displayed on the screen and written to our file in the background.</para> + <para>Now we retrieve all of the matching entries. The <command moreinfo="none">show</command> command uses an argument like X+Y, where X is the record number where the retrieval should start and Y is the number of consecutive records to be retrieved. The data will be displayed on the screen and written to our file in the background.</para> <screen format="linespecific"> <prompt>Z> </prompt><userinput>show 1+28</userinput> @@ -757,17 +713,207 @@ <prompt>Z> </prompt> </screen> - <para>Finally we can leave the client by typing:</para> + <para>Finally we can leave the client by typing:</para> <screen format="linespecific"> <prompt>Z> </prompt><userinput>quit</userinput> <prompt>~$ </prompt> </screen> - <para>If you attempt to open the resulting MARC file with a text editor or display it with a pager, you'll notice a couple of strange characters. MARC is a binary data format which is not supposed to be readable as plain text. If you want to display the file in a human-readable form, use the tool <command moreinfo="none">marcdump</command> (this is part of the MARC::Record perl module which is required for the <command moreinfo="none">marc2ris.pl</command> converter shipped with &appname;; if there is no marcdump on your system, ask your administrator):</para> + <para>If you attempt to open the resulting MARC file with a text editor or display it with a pager, you'll notice a couple of strange characters. MARC is a binary data format which is not supposed to be readable as plain text. If you want to display the file in a human-readable form, use the tool <command moreinfo="none">marcdump</command> (this is part of the MARC::Record perl module which is required for the <command moreinfo="none">marc2ris.pl</command> converter shipped with &appname;; if there is no marcdump on your system, ask your administrator):</para> <screen format="linespecific"> <prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">marcdump knuth.loc.usmarc | less</userinput> </screen> - <para>The structure of a MARC record is quite complex. It is divided into a leader, fields with three-digit names, indicators, and subfields. No need to understand it at this point, though.</para> - <para>&appname; ships the Perl script <command moreinfo="none">marc2ris.pl</command> which attempts to convert MARC datasets to the RIS format like this:</para> + <para>The structure of a MARC record is quite complex. It is divided into a leader, fields with three-digit names, indicators, and subfields. No need to understand it at this point, though.</para> + <para>&appname; ships the Perl script <command moreinfo="none">marc2ris.pl</command> which attempts to convert MARC datasets to the RIS format like this:</para> <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">marc2ris.pl knuth.loc.usmarc > knuth.loc.ris</userinput></screen> - <para>This will convert the references we downloaded to corresponding references in RIS format which will be written to the file <filename moreinfo="none">knuth.loc.ris</filename>. If you retrieved the data in a different format, use the <option>-t</option> command line option to specify the input file format: "marc21", which is equivalent to USMARC, "unimarc", or "ukmarc". Now you can proceed as described <link linkend="sect-retrieve-pubmed">above</link> and add the contents of this RIS file to your database.</para> + <para>This will convert the references we downloaded to corresponding references in RIS format which will be written to the file <filename moreinfo="none">knuth.loc.ris</filename>. If you retrieved the data in a different format, use the <option>-t</option> command line option to specify the input file format: "marc21", which is equivalent to USMARC, "unimarc", or "ukmarc". Now you can proceed as described <link linkend="sect-retrieve-pubmed">above</link> and add the contents of this RIS file to your database.</para> + </sect3> + </sect2> + <sect2 id="sect-create-risx"> + <title>How to create risx datasets</title> + <para>risx datasets basically carry the same information as RIS datasets, but they use an XML format instead of tagged lines.</para> + <sect3> + <title>What a risx dataset looks like</title> + <para>The main advantages of risx are:</para> + <itemizedlist> + <listitem> + <para>The XML format is a good target for transformation of other bibliographic data in SMGL or XML formats.</para> + </listitem> + <listitem> + <para>XML can be edited using either a general-purpose editor or, even more conveniently, with any XML editor.</para> + </listitem> + <listitem> + <para>XML datasets can be validated, i.e. checked for completeness and for an appropriate structure.</para> + </listitem> + </itemizedlist> + <para>Now lets have a look what the same dataset we had in RIS format above would look like in risx:</para> + <programlisting format="linespecific"> +<![CDATA[ +<?xml version="1.0" encoding="UTF-8" ?> +<!DOCTYPE ris PUBLIC "-//Markus Hoenicka//DTD Ris V1.0.2//EN" "http://refdb.sourceforge.net/dtd/risx.dtd"> +<ris> + <entry type="BOOK" citekey="smith1975metalloporphyrins"> + <publication> + <title>Porphyrins and metalloporphyrins</title> + <author> + <lastname>Smith</lastname> + <firstname>K</firstname> + <middlename>M</middlename> + </author> + <pubinfo> + <pubdate> + <date> + <year>1975</year> + </date> + </pubdate> + <city>Amsterdam</city> + <publisher>Elsevier Scientific Publishing Company</publisher> + </pubinfo> + </publication> + <libinfo user="markus"> + <reprint status="NOTINFILE" /> + </libinfo> + <contents> + <keyword>Porphyrins</keyword> + <keyword>Metalloporphyrins</keyword> + <keyword>Spectrophotometry [methods]</keyword> + <keyword>spectroscopy</keyword> + </contents> + </entry> +</ris> +]]> +</programlisting> + <para>This is a complete file with just one reference (you could add more <sgmltag class="element">entry</sgmltag> elements for additional references). As you can see, the entry is a lot more verbose compared to RIS due to the spelled-out start and end tags. However, modern XML editors compensate this verbosity with nifty feature like tab completion and automatic insertion of end tags.</para> + <para>You will notice three large blocks of data in this dataset:</para> + <itemizedlist> + <listitem> + <para>The <sgmltag class="element">publication</sgmltag> element contains the bulk of the bibliographic data proper, like the title, the author(s), and the publication date. Simple entry types like the book we see here make do with one level of bibliographic information. Complex types need more than one level. A journal article needs the <sgmltag class="element">part</sgmltag> element for the article proper and the <sgmltag class="element">publication</sgmltag> element for the information related to the journal. A book published as a part of the series needs the <sgmltag class="element">set</sgmltag> element for the series information in addition to the <sgmltag class="element">publication</sgmltag> element.</para> + </listitem> + <listitem> + <para>The <sgmltag class="element">libinfo</sgmltag> element contains the information specific to one &appname; user, like availability information and personal notes. As a matter of fact, a risx dataset can contain an unlimited number of <sgmltag class="element">libinfo</sgmltag> elements, one per user of the system. See also the section about <link linkend="sect-global-personal-fields">global and personal fields</link>.</para> + </listitem> + <listitem> + <para>The <sgmltag class="element">contents</sgmltag> element holds the abstract (not shown here) and keywords.</para> + </listitem> + </itemizedlist> + <para>For further information please visit the <ulink url="http://refdb.sourceforge.net/risx/book1.html">documentation of the risx DTD</ulink>.</para> + </sect3> + <sect3> + <title>Writing risx datasets from scratch</title> + <para>Using your favourite XML editor, writing a risx dataset from scratch should not be exceedingly difficult. The editor should prompt you for required elements and attributes, and refuse to enter an invalid structure. See the example datasets shipped with &appname; to get an idea what different entry types should look like.</para> + </sect3> + <sect3> + <title>Transforming SGML and XML bibliographic data to risx</title> + <para>This topic is somewhat beyond the scope of this introductory tutorial, but if you're familiar with SGML or XML transformations in general, this should not be too hard either. Each input bibliographic format will require a custom DSSSL (for SGML data) or XSLT (for XML data) stylesheet that transforms the data to risx.</para> + </sect3> + <sect3> + <title>Validating risx datasets</title> + <para>If you write risx datasets from scratch or develop your own stylesheets for SGML/XML transformations, it is strongly recommended to validate the results of your laborious efforts. &appname;d uses a non-validating parser to map the risx data to the appropriate database columns. If your input data are invalid, the results might not be to your liking. Two tools come in handy to validate your input data:</para> + <variablelist> + <varlistentry> + <term>onsgmls</term> + <listitem> + <para>This tool is part of the OpenJade package of SGML/XML tools. The following command can be used to validate a risx document:</para> + <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">onsgmls -wxml -s /usr/local/share/refdb/declarations/xml.dcl risxrefs.xml</userinput></screen> + </listitem> + </varlistentry> + <varlistentry> + <term>xmllint</term> + <listitem> + <para>This tool is part of the libxml2 package. Use it like this to validate a risx document:</para> + <screen format="linespecific"><prompt moreinfo="none">~$ </prompt><userinput moreinfo="none">xmllint --valid --noout risxrefs.xml</userinput></screen> + </listitem> + </varlistentry> + </variablelist> + </sect3> + </sect2> + <sect2 id="sect-global-personal-fields"> + <title>Global and personal fields</title> + <para>&appname; differs from other reference management tools because a main goal of its design is to encourage people to share their references. However, you may have figured from the tag list above that some of these entries only make sense if they can be maintained by each user individually. This is precisely the approach used by &appname;: The "hard" bibliographic data are global and identical for each user. The "soft" personal data, which are the only ones likely to change after the reference was added anyway, are maintained for each user individually. These personal fields are:</para> + <itemizedlist> + <listitem> + <para>The reprint status (RP)</para> + </listitem> + <listitem> + <para>The availability field (AV)</para> + </listitem> + <listitem> + <para>The notes field (N1)</para> + </listitem> + </itemizedlist> + <para>Even if you use one of the import filters described below or if you use RIS files exported from other bibliographic software, you should take the time to fill these fields with useful values. If you don't specify values, the AV and N1 field will be blank (this is ok), and the RP field will have the default value "NOT IN FILE".</para> + </sect2> + <sect2> + <title>Character encodings</title> + <para>One seemingly intimidating detail about reference data is the character encoding issue. At the lowest level, a computer doesn't understand anything but two states of a bit: off and on, usually represented as 0 (zero) and 1 (one). Concatenating several bits still doesn't make a text, but a series of binary numbers at best. This is why even text strings are represented as numbers in a computer's memory. Simply put, a character encoding is a lookup table that tells a computer which character to print if it encounters a particular binary number in a byte sequence that represents a text. The well-known ASCII encoding is understood by most computers but covers only 127 characters. Other encodings like Latin-1 contain all ASCII characters plus many special characters used in European languages. Still other encodings are far more versatile in that they allow to encode all characters used by recent and extinct languages. These are the various forms of the Unicode character encodings.</para> + <para>Although many encodings are known by several names, each character encoding has a preferred name which is usually identical with the MIME encoding name (the one you sometimes see in the header of emails). The names are case-insensitive, but otherwise the spelling must match precisely. E.g. UTF-16 and utf-16 are both recognized, whereas utf16 is incorrect.</para> + <para>Now where do these character encodings come into play? First of all, your reference database uses one particular character encoding, set by your administrator. All data that you add to this database will be converted to that encoding, and all data that you retrieve from this database will have to be converted from that encoding if necessary. To find out what encoding your database uses, run this command in &appname;c:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>whichdb</userinput> +Current database: refs +Number of references: 34 +Highest reference ID: 34 +Number of notes: 0 +Highest note ID: 0 +Encoding: UTF-8 +Database type: risx +Database server: pgsql +Created: 2004-02-07 20:39:02 UTC +Using refdb version: 0.9.4-pre6 +Last modified: 2004-02-09 21:43:10 UTC +</screen> + <para>In this example, the database uses the UTF-8 encoding, one of the most versatile Unicode encodings. Now, how do you convert your input data to e.g. UTF-8? Fortunately you don't have to, at least in most cases, as &appname; does this for you on the fly. It just needs to know what encoding your input data use. The two input data formats employ two different ways to specify the character encoding:</para> + <variablelist> + <varlistentry> + <term>RIS</term> + <listitem> + <para>RIS data do not have a built-in mechanism to record the character encoding. You will have to tell &appname; explicitly. You do this by using the <option>-E encoding</option> option with the <link linkend="sect-add-update-refs">addref</link> command. Allowed are all encodings that your operating system can deal with. The most common examples are UTF-8, ASCII, and ISO-8859-1 through -15 (the various character sets for European languages).</para> + </listitem> + </varlistentry> + <varlistentry> + <term>risx</term> + <listitem> + <para>As shown in the <link linkend="sect-create-risx">risx example above</link>, each file containing risx data should specify the encoding in the processing instructions (the very first line of an XML file). Allowed are only four encodings: UTF-8, UTF-16, ASCII, and ISO-8859-1.</para> + </listitem> + </varlistentry> + </variablelist> + <para>After you've added your data to the database, you're not yet done with encodings. The same issue pops up when you <link linkend="sect-finding-refs">retrieve datasets from the database</link>. By default, &appname; sends the data using the same encoding as the database uses. However, you can retrieve the data using any encoding that your platform supports.</para> + </sect2> + <sect2 id="sect-add-update-refs"> + <title>How to add and update references</title> + <para>In most cases you have a new set of references and want to add them to your database. Assuming your RIS references are stored in a file <filename moreinfo="none">newrefs.ris</filename> in the present working directory, all you need to do is:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref newrefs.ris</userinput></screen> + <para>To simply add the references in the example RIS file, use this command:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref /usr/local/share/&appname;/examples/testrefs.ris</userinput></screen> + <para>We have not used the <option>-E</option> option to select an encoding here, as the example data uses the ISO-8859-1 encoding which we have set as the default in our config file. If the input data were encoded in UTF-8, we'd do the following instead:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref -E UTF-8 /usr/local/share/&appname;/examples/testrefs.ris</userinput></screen> + <para>If you have references in the risx format instead, you'll have to tell &appname; so. Do this with the <option>-t risx</option> switch:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref -t risx newrefs.xml</userinput></screen> + <para>To add the example risx data, use this command:</para> + <screen format="linespecific"><prompt moreinfo="none">&appname;c: </prompt><userinput>addref -t risx /usr/local/share/&appname;/examples/testrefs.xml</userinput></screen> + <para>Remember that risx data carry their encoding information in the processing instructions, so there is no need for using the <option>-E</option> option.</para> + <para>&appname;c will try to add the references stored in these files to your default database. The diagnostic messages will be displayed in your pager, so if you send a dozen or so references it might take a few seconds until the results are displayed. You'll see a message for each reference found in the input file: Which ID was assigned, which citation key was used (or generated if you didn't specify one), and whether the operation was successful. Adding references usually fails only for two reasons:</para> + <itemizedlist> + <listitem> + <para>The input data were not the type of data that &appname; expected. Either the data were simply corrupt, or there was a mismatch between the data type and the value of the <option>-t</option> option.</para> + </listitem> + <listitem> + <para>A citation key was specified in the reference but the same citation key already exists in the database. &appname; will refuse to accept the reference because citation keys <emphasis>must</emphasis> be unique. You can fix this by changing or deleting the offending citation key in the input file.</para> + </listitem> + </itemizedlist> + <para>Once a reference is added to the database, you might still feel the urge to change it. Be it that you would like to add further keywords or that your personal information, like the reprint status, have changed since. The most straightforward way to change a reference is to retrieve it in either RIS or risx format, save it to a file, edit it, and send the updated copy back to where it came from. The following sequence of commands shows this, assuming we want to use the RIS format:</para> + <note> + <para>In this example we'll assume that you already know the ID of the reference that you want to change. You'll learn <link linkend="sect-finding-refs">later</link> how to find a reference by all sorts of criteria like authors, keywords and the like. This section also has additional information on the <command moreinfo="none">getref</command> command used here.</para> + </note> + <screen format="linespecific"> +<prompt moreinfo="none">&appname;c: </prompt><userinput moreinfo="none">getref -t ris -o editme.ris :ID:=7</userinput> +2595 byte written to /usr/home/markus/refdb/editme.ris +<prompt moreinfo="none">&appname;c: </prompt> +</screen> + <para>Now you can use your favourite text editor to change the file <filename moreinfo="none">editme.ris</filename> as you see fit. For this exercise we'll just change the reprint status (the RP field) from "NOT IN FILE" to "IN FILE". When you're done, save the file and go back to &appname;c:</para> + <screen format="linespecific"> +<prompt moreinfo="none">&appname;c: </prompt><userinput moreinfo="none">updateref -P editme.ris</userinput> +Updating input set 1 successful +1 dataset(s) updated, 0 added, 0 failed +1 dataset(s) sent. +</screen> + <para>Now what was that <option>-P</option> switch good for? This switch tells &appname; that it should only update your personal information of this reference, i.e. the RP, AV, and N1 fields. This is a lot faster than updating the whole reference. It is also more secure as you might have changed the file somewhere else accidentally without noticing. On the other hand, if you e.g. correct a typo you noticed in the title (TI) field, you <emphasis>must not</emphasis> use the <option>-P</option> switch.</para> </sect2> </sect1> @@ -795,9 +941,10 @@ <prompt>refdbc: </prompt><userinput>getref -h</userinput> Displays the result of a database search. -Syntax: getref [-c command] [-d database] [-h] [-o outfile] [-O outfile][-P] [-R pdfroot] [-s format] [-S tag] [-t output-format] {search-string|-f infile} -Search-string: {:XY:{<|=|!=|>}{unix-regexp}} [AND|OR|AND NOT] [...] +Syntax: getref [-c command] [-d database] [-E encoding] [-h] [-o outfile] [-O outfile][-P] [-R pdfroot] [-s format] [-S tag] [-t output-format] {search-string|-f infile} +Search-string: {:XY:{<|=|~|!=|!~|>}{string|regexp}} [AND|OR|AND NOT] [...] where XY specifies the field to search in Options: -c command pipe the output through command -d database specify the database to work with + -E encoding set the output character encoding -h prints this mini-help -o outfile save the output in outfile (overwrite) @@ -807,5 +954,5 @@ -s format specify fields for screen or style for DocBook output -S tag sort output by tag ID (default) or PY - -t output-format display as format scrn, html, db31, teix, ris, or bibtex + -t output-format display as format scrn, html, xhtml, db31, db31x, teix, ris, risx, or bibtex -f infile use the saved search line in file infile All other arguments are interpreted as the search string. @@ -850,5 +997,5 @@ </screen> <para>As all references have an ID value greater than 0, this command catches them all. But beware, this could literally be thousands of references, so don't keep your server busy just for fun.</para> - <para>In most cases you have some idea about the reference you're looking for. Either you know the name of an author, or when the document was published. Maybe you know a word or a phrase from the title, or you want to use keywords to look up references about a particular topic. The <command moreinfo="none">getref</command> query language allows you to use all data fields in any combination to retrieve references. Let's first have a look at what fields are available besides the ID field we already know.</para> + <para>In most cases you have some idea about the reference you're looking for. Either you know the name of an author, or when the document was published. Maybe you know a word or a phrase from the title, or you want to use keywords to look up references about a particular topic. The <command moreinfo="none">getref</command> query language allows you to use all data fields in any combination to retrieve references. Let's first have a look at what fields are available besides the ID field we saw above.</para> <variablelist> <varlistentry> @@ -997,8 +1144,23 @@ </varlistentry> </variablelist> + <para>In addition to the above field specifiers, there are a few that allow to retrieve references based on extended notes attached to them (see <link linkend="sect-adding-extended-notes">below</link> for an explanation of extended notes):</para> + <variablelist> + <varlistentry> + <term>:NID:</term> + <listitem> + <para>The ID of an extended note.</para> + </listitem> + </varlistentry> + <varlistentry> + <term>:NCK:</term> + <listitem> + <para>The alphanumeric key of an extended note.</para> + </listitem> + </varlistentry> + </variablelist> </sect3> <sect3> <title>Doing comparisons</title> - <para>We have to distinguish between numeric and alphanumeric fields, as indicated in the list above. This determines which comparison operators are available for that particular field. Alphanumeric fields can use the operators "=" (equality) and "!=" (non-equality). Numeric fields can use the operators "<" (less than) and ">" (greater than) in addition to the former. The comparison of numeric fields should be straightforward, but the comparison of alphanumeric field needs a little further elucidation. Alphanumeric comparisons use a Unix-style <emphasis>regular expression</emphasis> on the right-hand side of the operator. The query matches if the regular expression finds a match somewhere in the text stored in the requested field. That is, the comparison ":N1:=warts" would return a result if the character sequence "warts" is somewhere in the text stored in the notes (N1) field.</para> + <para>We have to distinguish between numeric and alphanumeric fields, as indicated in the list above. This determines which comparison operators are available for that particular field. Alphanumeric fields can use the operators "=" (literal equality) and "!=" (literal non-equality) as well as "~" (regular expression equality) and "!~" (regular expression non-equality). Numeric fields can use the operators "=" (equality), "!=" (non-equality), "<" (less than), and ">" (greater than). The comparison of numeric fields should be straightforward, but the comparison of alphanumeric field needs a little further elucidation. Alphanumeric comparisons use either literal strings or <emphasis>regular expressions</emphasis> on the right-hand side of the operator. In the first case, the query matches if the whole string in the field indicated on the left is identical ("=") or not identical ("!=") to the string on the right. In the second case, the query matches if the regular expression finds a match somewhere in the text stored in the requested field. That is, the comparison ":N1:~'warts'" would return a result if the character sequence "warts" is somewhere in the text stored in the notes (N1) field. On the other hand, the comparison ":N1:='warts' would match only if a dat... [truncated message content] |
From: Markus H. <mho...@us...> - 2004-02-10 23:53:31
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29453 Modified Files: noteshandler.c Log Message: added nongeek_offset, fixed return message Index: noteshandler.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/noteshandler.c,v retrieving revision 1.14 retrieving revision 1.15 diff -u -U2 -r1.14 -r1.15 --- noteshandler.c 29 Jan 2004 01:35:27 -0000 1.14 +++ noteshandler.c 10 Feb 2004 23:49:49 -0000 1.15 @@ -42,4 +42,5 @@ extern int n_log_level; +extern int nongeek_offset; extern struct BSTRING connerr; extern struct BSTRING outomem; @@ -988,5 +989,16 @@ else { my_dbi_conn_unlock(ptr_andata->conn); - sprintf(sql_command, "Adding set "ULLSPEC" successful\n", (unsigned long long)(ptr_andata->set_count)); + + if (ptr_andata->create_new) { + sprintf(sql_command, "Adding input set "ULLSPEC" successful\n", (unsigned long long)(ptr_andata->set_count + nongeek_offset)); + (ptr_andata->added_count)++; + } + else { + sprintf(sql_command, "Updating input set "ULLSPEC" successful\n", (unsigned long long)(ptr_andata->set_count + nongeek_offset)); + (ptr_andata->updated_count)++; + } + + (ptr_andata->set_count)++; + if ((new_msgpool = mstrcat(ptr_andata->msgpool, sql_command, &(ptr_andata->msgpool_len), 0)) == NULL) { LOG_PRINT(LOG_WARNING, outomem.text); @@ -997,12 +1009,4 @@ ptr_andata->msgpool = new_msgpool; } - /* ToDo: see whether updated or added */ - if (ptr_andata->create_new) { - (ptr_andata->added_count)++; - } - else { - (ptr_andata->updated_count)++; - } - (ptr_andata->set_count)++; } } |
From: Markus H. <mho...@us...> - 2004-02-09 20:55:40
|
Update of /cvsroot/refdb/tutorial In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9744 Log Message: initial import Status: Vendor Tag: vendor Release Tags: start N tutorial/refdbtutorial.sgml No conflicts created by this import ***** Bogus filespec: - ***** Bogus filespec: Imported ***** Bogus filespec: sources |
From: Markus H. <mho...@us...> - 2004-02-08 00:39:45
|
Update of /cvsroot/refdb/refdb/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14670/scripts Modified Files: refdbnd.in Log Message: check for existence of document before attempting to extract values Index: refdbnd.in =================================================================== RCS file: /cvsroot/refdb/refdb/scripts/refdbnd.in,v retrieving revision 1.7 retrieving revision 1.8 diff -u -U2 -r1.7 -r1.8 --- refdbnd.in 30 Dec 2003 23:58:15 -0000 1.7 +++ refdbnd.in 8 Feb 2004 00:36:39 -0000 1.8 @@ -35,30 +35,32 @@ check_existing_doc() { -# guess values from existing file - xfile=$basename - - if grep "^<!DOCTYPE.*DTD DocBook V3.1//EN" $basename; then - doctype="db31" - pubtype=`grep "DTD DocBook V3.1//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` - else - if grep "^<!DOCTYPE.*DTD DocBook V4.0//EN" $basename; then - doctype="db40" - pubtype=`grep "DTD DocBook V4.0//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` - else - if grep "^<!DOCTYPE.*DTD DocBook V4.1//EN" $basename; then - doctype="db41" - pubtype=`grep "DTD DocBook V4.1//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + if [ -e $basename ]; then + # guess values from existing file + xfile=$basename + + if grep "^<!DOCTYPE.*DTD DocBook V3.1//EN" $basename; then + doctype="db31" + pubtype=`grep "DTD DocBook V3.1//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + else + if grep "^<!DOCTYPE.*DTD DocBook V4.0//EN" $basename; then + doctype="db40" + pubtype=`grep "DTD DocBook V4.0//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` else - if grep "^<!DOCTYPE.*DTD DocBook XML V4.1.2//EN" $basename; then - doctype="db41x" - pubtype=`grep "DTD DocBook XML V4.1.2//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + if grep "^<!DOCTYPE.*DTD DocBook V4.1//EN" $basename; then + doctype="db41" + pubtype=`grep "DTD DocBook V4.1//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` else - if grep "^<!DOCTYPE.*DTD DocBook XML V4.2//EN" $basename; then - doctype="db42x" - pubtype=`grep "DTD DocBook XML V4.2//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + if grep "^<!DOCTYPE.*DTD DocBook XML V4.1.2//EN" $basename; then + doctype="db41x" + pubtype=`grep "DTD DocBook XML V4.1.2//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` else - if grep "^<!DOCTYPE.*TEI P4//DTD Main DTD Driver File//EN" $basename; then - doctype="teix" - pubtype=`grep "TEI P4//DTD Main DTD Driver File//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + if grep "^<!DOCTYPE.*DTD DocBook XML V4.2//EN" $basename; then + doctype="db42x" + pubtype=`grep "DTD DocBook XML V4.2//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + else + if grep "^<!DOCTYPE.*TEI P4//DTD Main DTD Driver File//EN" $basename; then + doctype="teix" + pubtype=`grep "TEI P4//DTD Main DTD Driver File//EN" $basename | sed 's/^<!DOCTYPE \(.*\) PUBLIC.*/\1/'` + fi fi fi @@ -66,6 +68,6 @@ fi fi + basename=${xfile%%.*} fi - basename=${xfile%%.*} } @@ -91,7 +93,6 @@ read basename - if [ ! "$basename" = "db31" ] && [ ! "$basename" = "db40" ] && [ ! "$basename" = "db41" ] && [ ! "$basename" = "db41x" ] && [ ! "$basename" = "db42x" ] && [ ! "$basename" = "teix" ]; then - check_existing_doc - fi + + check_existing_doc if [ -z $doctype ]; then @@ -148,10 +149,7 @@ # all arguments were specified on the command line basename=$1 - if [ ! "$basename" = "db31" ] && [ ! "$basename" = "db40" ] && [ ! "$basename" = "db41" ] && [ ! "$basename" = "db41x" ] && [ ! "$basename" = "db42x" ] && [ ! "$basename" = "teix" ]; then - check_existing_doc - else - doctype=$2 - pubtype=$3 - fi + doctype=$2 + pubtype=$3 + check_existing_doc database=$4 style=$5 |
From: Markus H. <mho...@us...> - 2004-02-08 00:39:01
|
Update of /cvsroot/refdb/refdb/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14505/scripts Modified Files: refdb.dump refdb.dump.pgsql refdb.dump.sqlite Log Message: added BMC to journal words Index: refdb.dump =================================================================== RCS file: /cvsroot/refdb/refdb/scripts/refdb.dump,v retrieving revision 1.5 retrieving revision 1.6 diff -u -U2 -r1.5 -r1.6 --- refdb.dump 9 Sep 2003 22:24:26 -0000 1.5 +++ refdb.dump 8 Feb 2004 00:36:03 -0000 1.6 @@ -737,4 +737,5 @@ INSERT INTO t_journal_words (name) VALUES ('BJOG'); INSERT INTO t_journal_words (name) VALUES ('CRYOBIOLOGY'); +INSERT INTO t_journal_words (name) VALUES ('BMC'); UNLOCK TABLES; Index: refdb.dump.pgsql =================================================================== RCS file: /cvsroot/refdb/refdb/scripts/refdb.dump.pgsql,v retrieving revision 1.5 retrieving revision 1.6 diff -u -U2 -r1.5 -r1.6 --- refdb.dump.pgsql 9 Sep 2003 22:24:26 -0000 1.5 +++ refdb.dump.pgsql 8 Feb 2004 00:36:03 -0000 1.6 @@ -696,4 +696,5 @@ INSERT INTO t_journal_words (name) VALUES ('BJOG'); INSERT INTO t_journal_words (name) VALUES ('CRYOBIOLOGY'); +INSERT INTO t_journal_words (name) VALUES ('BMC'); CREATE GROUP refdbuser; Index: refdb.dump.sqlite =================================================================== RCS file: /cvsroot/refdb/refdb/scripts/refdb.dump.sqlite,v retrieving revision 1.4 retrieving revision 1.5 diff -u -U2 -r1.4 -r1.5 --- refdb.dump.sqlite 9 Sep 2003 22:24:26 -0000 1.4 +++ refdb.dump.sqlite 8 Feb 2004 00:36:03 -0000 1.5 @@ -691,3 +691,4 @@ INSERT INTO t_journal_words (name) VALUES ('BJOG'); INSERT INTO t_journal_words (name) VALUES ('CRYOBIOLOGY'); +INSERT INTO t_journal_words (name) VALUES ('BMC'); |
From: Markus H. <mho...@us...> - 2004-02-08 00:37:56
|
Update of /cvsroot/refdb/refdb/etc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14357/etc Modified Files: refdbibrc Log Message: added encoding variable Index: refdbibrc =================================================================== RCS file: /cvsroot/refdb/refdb/etc/refdbibrc,v retrieving revision 1.4 retrieving revision 1.5 diff -u -U2 -r1.4 -r1.5 --- refdbibrc 13 Dec 2002 20:33:52 -0000 1.4 +++ refdbibrc 8 Feb 2004 00:34:59 -0000 1.5 @@ -76,3 +76,8 @@ stylespecdir . +# The character encoding that should be used for the bibliography output. +# Use an encoding that is supported by libiconv. See "man iconv_open" for +# a list of the available encodings on your system +#encoding UTF-8 + # end of refdbibrc \ No newline at end of file |
From: Markus H. <mho...@us...> - 2004-02-08 00:35:44
|
Update of /cvsroot/refdb/refdb/doc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13835 Modified Files: refdb-manual-chapter1.sgml refdb-manual-chapter10.sgml refdb-manual-chapter11.sgml refdb-manual-chapter12.sgml refdb-manual-chapter13.sgml refdb-manual-chapter14.sgml refdb-manual-chapter2.sgml refdb-manual-chapter4.sgml refdb-manual-chapter5.sgml refdb-manual-chapter7.sgml refdb-manual-chapter8.sgml Log Message: updated for 0.9.4 Index: refdb-manual-chapter1.sgml =================================================================== RCS file: /cvsroot/refdb/refdb/doc/refdb-manual-chapter1.sgml,v retrieving revision 1.11 retrieving revision 1.12 diff -u -U2 -r1.11 -r1.12 --- refdb-manual-chapter1.sgml 30 Dec 2003 23:51:36 -0000 1.11 +++ refdb-manual-chapter1.sgml 8 Feb 2004 00:32:45 -0000 1.12 @@ -70,5 +70,5 @@ </listitem> <listitem> - <para>The query language is fairly simple yet powerful. You can search in all fields in the database. You can use the Boolean operators <wordasword>AND</wordasword>, <wordasword>OR</wordasword>, <wordasword>NOT</wordasword> to combine search expressions. You can use brackets <wordasword>()</wordasword> to group search expressions. All alphanumeric fields (i.e. most except e.g. the publication year) treat the search string as a Unix-style regular expression which gives you enormous flexibility in your search strategies. The readline library reads the user input in all interactive clients. You can recall any previous search strings with a few keystrokes and re-run them or modify them as needed.</para> + <para>The query language is fairly simple yet powerful. You can search in all fields in the database. You can use the Boolean operators <wordasword>AND</wordasword>, <wordasword>OR</wordasword>, <wordasword>NOT</wordasword> to combine search expressions. You can use brackets <wordasword>()</wordasword> to group search expressions. You can use either literal matches or regular expressions in all alphanumeric fields (i.e. most except e.g. the publication year). This gives you enormous flexibility in your search strategies. The readline library reads the user input in all interactive clients. You can recall any previous search strings with a few keystrokes and re-run them or modify them as needed.</para> </listitem> <listitem> @@ -77,4 +77,7 @@ </listitem> <listitem> + <para>&appname; supports all character encodings available on your platform. While the available encodings in the database may be limited by the database engine, &appname; can convert incoming data as well as exported data with only few limitations.</para> + </listitem> + <listitem> <para>&appname; handles the AV field of the RIS input files in a very flexible way. You can specify a path to a PDF or Postscript version of the document on your harddrive or on the web. The local path can be split into a variable and a static part. The variable part can be specified on the command line e.g. if you access your data remotely via a NFS-mounted share.</para> </listitem> Index: refdb-manual-chapter10.sgml =================================================================== RCS file: /cvsroot/refdb/refdb/doc/refdb-manual-chapter10.sgml,v retrieving revision 1.18 retrieving revision 1.19 diff -u -U2 -r1.18 -r1.19 --- refdb-manual-chapter10.sgml 30 Dec 2003 23:51:36 -0000 1.18 +++ refdb-manual-chapter10.sgml 8 Feb 2004 00:32:45 -0000 1.19 @@ -34,5 +34,4 @@ <arg>-d <replaceable>database</replaceable></arg> <arg>-e <replaceable>log-destination</replaceable></arg> - <arg>-E <replaceable>encoding</replaceable></arg> <arg>-F <replaceable>fields</replaceable></arg> <arg>-g <replaceable>deffile</replaceable></arg> @@ -61,5 +60,4 @@ <arg choice="req">-d <replaceable>database</replaceable></arg> <arg>-e <replaceable>log-destination</replaceable></arg> - <arg>-E <replaceable>encoding</replaceable></arg> <arg>-F <replaceable>fields</replaceable></arg> <arg>-g <replaceable>deffile</replaceable></arg> @@ -85,5 +83,4 @@ <para>Remember that you don't have to specify all command-line options each time if you define the values in <link linkend="sect1-mystery-init-files">.&appname;crc</link>.</para> <para>Use the <option>-d</option> option to specify the database that you want to work with. In an interactive session you can also set and change the default database with the <link linkend="app-c-command-selectdb"><command>selectdb</command> command</link>.</para> - <para>The encoding specified with the <option>-E</option> option is used by the HTML output of the <link linkend="app-c-command-getref"><command moreinfo="none">getref</command></link> command.</para> <para>The <option>-F</option> option specifies the default fields that are to be displayed in a <link linkend="app-c-command-getref"><command moreinfo="none">getref</command></link> query.</para> <para>The <option>-g</option> option can be used to add some default fields to all references that are added or updated. The argument <replaceable>deffile</replaceable> is the filename of a <link linkend="sect1-ris-format">RIS file</link> containing these additional fields. &appname;c first tries the filename as is, so it should be a valid relative or absolute path. If the file is not found, &appname; looks for the file in <filename><envar>$HOME</envar>/</filename>. The command aborts if the file cannot be found.</para> @@ -132,4 +129,24 @@ </row> <row> + <entry>fromencoding</entry> + <entry>ISO-8859-1</entry> + <entry>The default encoding of RIS input data. You can use any encoding that your local libiconv implementation supports.</entry> + </row> + <row> + <entry>logdest</entry> + <entry>file</entry> + <entry>Where the log output should be written to. Use either stderr, syslog, or file. For the latter to work, the logfile variable must be set appropriately</entry> + </row> + <row> + <entry>logfile</entry> + <entry><filename moreinfo="none">/var/log/refdbc.log</filename></entry> + <entry>The full path of a custom log file.</entry> + </row> + <row> + <entry>loglevel</entry> + <entry>info</entry> + <entry>Set the level of log information that you would receive. Possible values, in order of increasing verbosity, are: emerg, alert, crit, err, warning, notice, info, debug</entry> + </row> + <row> <entry>pager</entry> <entry>stdout</entry> @@ -142,4 +159,9 @@ </row> <row> + <entry>pdfroot</entry> + <entry>(none)</entry> + <entry>This value will be used as the root of the paths to PDF or Postscript offprints that can be specified with the AV field in a RIS dataset. The path should not rely on shell expansion, e.g. use <filename>/home/me/literature/</filename> instead of <filename>~/literature/</filename>. The <link linkend="sect1-pdfroot">pdfroot</link> allows you to shorten the paths that you enter for each dataset and to maintain a certain portability if you have to move the offprints to a different directory or want to access them remotely. The html output routine will concatenate the relative path of each dataset with the pdfroot to construct the link to the offprint. Instead of a local path name you can specify an URL starting with http:// or ftp:// if your offprints are accessible through a web server or ftp server.</entry> + </row> + <row> <entry>port</entry> <entry>9734</entry> @@ -157,4 +179,9 @@ </row> <row> + <entry>toencoding</entry> + <entry>(none)</entry> + <entry>The default encoding of output data. You can use any encoding that your local libiconv implementation supports. If this value is not set, the encoding of the database will be used without conversion.</entry> + </row> + <row> <entry>username</entry> <entry>login name</entry> @@ -166,9 +193,4 @@ <entry>Set this to t if you prefer verbose error messages.</entry> </row> - <row> - <entry>pdfroot</entry> - <entry>(none)</entry> - <entry>This value will be used as the root of the paths to PDF or Postscript offprints that can be specified with the AV field in a RIS dataset. The path should not rely on shell expansion, e.g. use <filename>/home/me/literature/</filename> instead of <filename>~/literature/</filename>. The <link linkend="sect1-pdfroot">pdfroot</link> allows you to shorten the paths that you enter for each dataset and to maintain a certain portability if you have to move the offprints to a different directory or want to access them remotely. The html output routine will concatenate the relative path of each dataset with the pdfroot to construct the link to the offprint. Instead of a local path name you can specify an URL starting with http:// or ftp:// if your offprints are accessible through a web server or ftp server.</entry> - </row> </tbody> </tgroup> @@ -288,4 +310,5 @@ <command>addnote</command> <arg>-d <replaceable>database</replaceable></arg> + <arg>-E <replaceable>encoding</replaceable></arg> <arg>-h</arg> <group choice="opt" rep="norepeat"> @@ -305,4 +328,5 @@ <para>The <option>-c</option> switch allows to specify a <command>command</command>. &appname;c will open a pipe to the first program in the command and send the output to this program's stdin. The command may be any valid command that you can run in your shell, so further plumbing is perfectly legal.</para> <para>Use the <option>-d</option> option to specify the database that you want to work with if it is different from the currently selected database.</para> + <para>Select an input character encoding with the <option>-E</option> option if it is different from the default UTF-8.</para> <para>The <option>-h</option> option displays a short command syntax and description, then returns to the command prompt. </para> <para>The <option>-o</option> and <option>-O</option> switches allow to redirect the output to <filename>outfile</filename> instead of the default screen display. The two options differ in the way they handle an existing <filename>outfile</filename>. <option>-o</option> will replace the existing file, while <option>-O</option> will append to the existing file. If <filename>outfile</filename> cannot be opened with the proper permissions, the output is sent to stdout instead.</para> @@ -323,4 +347,5 @@ <command>addref</command> <arg>-d <replaceable>database</replaceable></arg> + <arg>-E <replaceable>encoding</replaceable></arg> <arg>-g <replaceable>deffile</replaceable></arg> <arg>-h</arg> @@ -343,4 +368,5 @@ <para>The <option>-c</option> switch allows to specify a <command>command</command>. &appname;c will open a pipe to the first program in the command and send the output to this program's stdin. The command may be any valid command that you can run in your shell, so further plumbing is perfectly legal. This feature may e.g. be used to filter the output with grep for the error messages, dropping all success messages. This is of course not intended to make your world look grey and dull, but to make it easier to spot the (hopefully zero or few) error messages inbetween all those success messages.</para> <para>Use the <option>-d</option> option to specify the database that you want to work with.</para> + <para>Select an input character encoding with the <option>-E</option> option if it is different from the default UTF-8. RIS datasets can use any encoding that your local libiconv supports (see <command moreinfo="none">man iconv_open</command> for a list of available encodings), except UTF-16 and UTF-32. RISX datasets carry the encoding in the processing instructions, therefore this option is ignored.</para> <para>You can use two different input file formats with this command. The default format is the <link linkend="sect1-ris-format">tagged RIS format</link>. Use <option>-t risx</option> to use XML files according to the RISX DTD as input data.</para> <para>The <option>-g</option> option can be used in conjunction with RIS data to add some default fields to all references that are added with this command. The argument <replaceable>deffile</replaceable> is the filename of a <link linkend="sect1-ris-format">RIS file</link> containing these additional fields. &appname;c first tries the filename as is, so it should be a valid relative or absolute path. If the file is not found, &appname; looks for the file in <filename><envar>$HOME</envar>/</filename>. The command aborts if the file cannot be found.</para> @@ -361,7 +387,7 @@ <simplesect> <title>Example</title> - <screen><prompt>&appname;c: </prompt><userinput>addref -U doe -g .refdbdefault.ris foo.ris</userinput></screen> + <screen><prompt>&appname;c: </prompt><userinput>addref -U doe -g .refdbdefault.ris -E ISO-8859-1 foo.ris</userinput></screen> <screen><prompt>$ </prompt><userinput>refdbc -C addref -U doe -g .refdbdefault.ris -d db1 < foo.ris</userinput></screen> - <para>These commands will add the references in <filename moreinfo="none">foo.ris</filename>. The references will be associated with the user <quote>doe</quote>. Every reference will use the specified values in <filename moreinfo="none">.refdbdefault.ris</filename> in the appropriate fields. In the first (interactive) command, the active database will be used. In the second (non-interactive) command, the database has to be specified explicitly with the <option>-d</option> option.</para> + <para>These commands will add the references in <filename moreinfo="none">foo.ris</filename>. The references will be associated with the user <quote>doe</quote>. Every reference will use the specified values in <filename moreinfo="none">.refdbdefault.ris</filename> in the appropriate fields. In the first (interactive) command, the active database will be used, and the encoding is set to ISO-8859-1, aka Latin-1. In the second (non-interactive) command, the database has to be specified explicitly with the <option>-d</option> option, and the default encoding (UTF-8) is assumed.</para> </simplesect> </sect2> @@ -605,4 +631,5 @@ <command>getnote</command> <arg>-d <replaceable>database</replaceable></arg> + <arg>-E <replaceable>encoding</replaceable></arg> <arg>-h</arg> <group choice="opt" rep="norepeat"> @@ -626,13 +653,13 @@ <para>The <option>-c</option> switch allows to specify a shell <command>command</command>. &appname;c will open a pipe to the first program in the command and send the output to this program's stdin. The command may be any valid command that you can run in your shell, so further plumbing is perfectly legal. This command is handy if you want to search potentially long fields like the content for certain strings. Searching all abstracts of a database with a normal query is slow. It is usually faster to narrow down the search using other fields as far as possible without including the content field and then use grep to find what you want.</para> <para>Use the <option>-d</option> option to specify the database that you want to work with.</para> + <para>The retrieved data will use the character encoding of the database unless you request a different encoding with the <option>-E</option> option. All encodings supported by your local libiconv installation may be specified here.</para> <para>The <option>-h</option> option displays a short command syntax and description, then returns to the command prompt. </para> <para>The <option>-o</option> and <option>-O</option> switches allow to redirect the output to <filename>outfile</filename> instead of the default screen display. The two options differ in the way they handle an existing <filename>outfile</filename>. <option>-o</option> will replace the existing file, while <option>-O</option> will append to the existing file. If <filename>outfile</filename> cannot be opened with the proper permissions, the output is sent to stdout instead.</para> <caution> - <para>Depending on your query, the getref command can generate an enormous amount of output. If you view the output with a pager, the client-server communication will stall as soon as the pager accepts no new data. If the connection times out, your query results will be incomplete. It is strongly recommended to redirect all queries which return a lot of references (rule of thumb: more than 100 for screen output, more than 50 for other output) to a file or to a pipe that can handle the amount of data.</para> + <para>Depending on your query, the getnote command can generate an enormous amount of output. If you view the output with a pager, the client-server communication will stall as soon as the pager accepts no new data. If the connection times out, your query results will be incomplete. It is strongly recommended to redirect all queries which return a lot of notes (rule of thumb: more than 100 for screen output, more than 50 for other output) to a file or to a pipe that can handle the amount of data.</para> </caution> - <para>Except for RIS and risx output which always display the full dataset, the <option>-s</option> switch allows to specify additional fields (N1, N2/AB, NX, RP, SN, AD, CY, PB, UR, U1 through U5, M1 through M3) that are not displayed by default. Use "ALL" as an argument to display all available fields. If several fields are specified, the argument has to be enclosed by single quotation marks. If applied to RIS output, you can specify <wordasword>ID</wordasword> as <replaceable>format-string</replaceable> to get only a list of ID values in RIS format for all references that match the search. This is a convenient way to generate ID lists for later operations like <link linkend="app-c-command-deleteref"><command>deleteref</command></link>.</para> <para>The <option>-S</option> switch is used to sort the output. Currently you can sort only by <wordasword>ID</wordasword> (the default) or by <wordasword>PY</wordasword> (publication year).</para> - <para>The <option>-P</option> switch limits the search to the files which are in the current user's personal reference list. If this switch is absent, the whole database will be searched.</para> - <para>The <option>-t</option> switch determines the <link linkend="sect-output-formats">type of output</link>. The default value for <replaceable>output-format</replaceable> is <wordasword>scrn</wordasword> (screen output), other possible values are <wordasword>db31</wordasword> (DocBook SGML V. 3.1), <wordasword>db31x</wordasword> (DocBook XML), <wordasword>ris</wordasword> (RIS as of Reference Manager 8.01), <wordasword>risx</wordasword> (XML according to the <link linkend="sect1-writing-risx">risx DTD</link>), <wordasword>html</wordasword> (HTML), <wordasword>xhtml</wordasword> (XHTML), and <wordasword>bibtex</wordasword> (BibTeX).</para> + <para>The <option>-P</option> switch limits the search to the notes which were added by the current user. If this switch is absent, the whole database will be searched.</para> + <para>The <option>-t</option> switch determines the <link linkend="sect-output-formats">type of output</link>. The default value for <replaceable>output-format</replaceable> is <wordasword>scrn</wordasword> (screen output), other possible values are <wordasword>xnote</wordasword> (XML according to the xnote DTD), <wordasword>html</wordasword> (HTML), and <wordasword>xhtml</wordasword>.</para> <para>The <option>-f</option> switch reads the search string from <filename>file</filename> instead of from the command line, thus allowing to save searches which will be run repeatedly.</para> <para>The syntax of the queries is described in the section <link linkend="sect1-query-language">query language</link>.</para> @@ -640,6 +667,6 @@ <simplesect> <title>Example</title> - <screen width="60" format="linespecific"><prompt>&appname;c: </prompt><userinput>getref -t db31 -o temp.sgml ":AU:='& ^Doe ^Jones' AND :KW:=circular\ dichroism"</userinput></screen> - <para>This command retrieves articles with both an author starting with <quote>Doe</quote> and an author starting with <quote>Jones</quote> that have the keyword <quote>circular dichroism</quote>. The output will be saved as DocBook SGML into the file <filename moreinfo="none">temp.sgml</filename>.</para> + <screen width="60" format="linespecific"><prompt>&appname;c: </prompt><userinput>getnote -t xnote :CK:=Miller1999</userinput></screen> + <para>This command retrieves notes which are attached to the reference with the citation key "Miller1999" and displays them in the xnote format.</para> </simplesect> </sect2> @@ -651,4 +678,5 @@ <command>getref</command> <arg>-d <replaceable>database</replaceable></arg> + <arg>-E <replaceable>encoding</replaceable></arg> <arg>-h</arg> <group choice="opt" rep="norepeat"> @@ -672,4 +700,5 @@ <para>The <option>-c</option> switch allows to specify a shell <command>command</command>. &appname;c will open a pipe to the first program in the command and send the output to this program's stdin. The command may be any valid command that you can run in your shell, so further plumbing is perfectly legal. This command is handy if you want to search potentially long fields like the abstracts for certain strings. Searching all abstracts of a database with a normal query is slow. It is usually faster to narrow down the search using other fields as far as possible without including the N2 field and then use grep to find what you want.</para> <para>Use the <option>-d</option> option to specify the database that you want to work with.</para> + <para>The retrieved data will use the character encoding of the database unless you request a different encoding with the <option>-E</option> option. All encodings supported by your local libiconv installation may be specified here. See <command moreinfo="none">man iconv_open</command> for a list of available encodings.</para> <para>The <option>-h</option> option displays a short command syntax and description, then returns to the command prompt. </para> <para>The <option>-o</option> and <option>-O</option> switches allow to redirect the output to <filename>outfile</filename> instead of the default screen display. The two options differ in the way they handle an existing <filename>outfile</filename>. <option>-o</option> will replace the existing file, while <option>-O</option> will append to the existing file. If <filename>outfile</filename> cannot be opened with the proper permissions, the output is sent to stdout instead.</para> @@ -686,6 +715,6 @@ <simplesect> <title>Example</title> - <screen width="60" format="linespecific"><prompt>&appname;c: </prompt><userinput>getref -t db31 -o temp.sgml ":AU:='& ^Doe ^Jones' AND :KW:=circular\ dichroism"</userinput></screen> - <para>This command retrieves articles with both an author starting with <quote>Doe</quote> and an author starting with <quote>Jones</quote> that have the keyword <quote>circular dichroism</quote>. The output will be saved as DocBook SGML into the file <filename moreinfo="none">temp.sgml</filename>.</para> + <screen width="60" format="linespecific"><prompt>&appname;c: </prompt><userinput>getref -t ris -o temp.sgml -E ISO-8859-15 ":AU:='& ^Doe ^Jones' AND :KW:=circular\ dichroism"</userinput></screen> + <para>This command retrieves articles with both an author starting with <quote>Doe</quote> and an author starting with <quote>Jones</quote> that have the keyword <quote>circular dichroism</quote>. The output will be saved in RIS format to the file <filename moreinfo="none">temp.sgml</filename> using the character encoding ISO-8859-15.</para> </simplesect> </sect2> @@ -849,4 +878,5 @@ <command>updatenote</command> <arg>-d <replaceable>database</replaceable></arg> + <arg>-E <replaceable>encoding</replaceable></arg> <arg>-h</arg> <group choice="opt" rep="norepeat"> @@ -880,4 +910,5 @@ <command>updateref</command> <arg>-d <replaceable>database</replaceable></arg> + <arg>-E <replaceable>encoding</replaceable></arg> <arg>-g <replaceable>deffile</replaceable></arg> <arg>-h</arg> @@ -900,5 +931,5 @@ <para>Updates the references in RIS format in <replaceable>file</replaceable> in the current database.</para> <para>This command is essentially the same as <link linkend="app-c-command-addref">addref</link>, but it uses the <wordasword>ID</wordasword> fields in the input data to update existing references with the same ID. If the ID of a reference is not existent in the database, a new entry is created, ignoring the ID specified in the RIS or risx file. Currently &appname; does not check whether the new dataset has any similarity with the old one having the same ID. If you tell &appname; to update a reference, it uses whatever you send to this end.</para> - <para>For the <option>-c</option>, <option>-g</option>, <option>-h</option>, <option>-o</option>, <option>-O</option>, <option>-U</option>, and <option>-f</option> options, please refer to the description of the <link linkend="app-c-command-addref"><command moreinfo="none">addref</command></link> command.</para> + <para>For the <option>-c</option>, <option>-E</option>, <option>-g</option>, <option>-h</option>, <option>-o</option>, <option>-O</option>, <option>-U</option>, and <option>-f</option> options, please refer to the description of the <link linkend="app-c-command-addref"><command moreinfo="none">addref</command></link> command.</para> <para>Use the <option>-P</option> switch to update only the personal information for this reference, i.e. the N1 (notes), RP (reprint status), and AV (availability) fields. This will automatically add the reference to your personal reference list. All other fields will be ignored. Combine this option with the <option>-g</option> option e.g. to quickly change the reprint status of existing references to <quote>IN FILE</quote> from <quote>NOT IN FILE</quote> or from <quote>ON REQUEST</quote>.</para> </simplesect> @@ -953,4 +984,5 @@ Number of notes: 2 Highest note ID: 2 +Encoding: ISO-8859-1 Database type: risx Server type: pgsql @@ -1076,5 +1108,5 @@ <title id="sect1.title-query-language">The query language</title> <para>The <link linkend="app-c-command-getref"><command>getref</command></link> command is probably the most heavily used command. You use it to retrieve the references that you collected and saved in the database. To find a certain article or several related articles, all you have to do is to express your query in a language that &appname; understands. The first section describes how to formulate search strings for your queries.</para> - <para>The <link linkend="app-c-command-getnote"><command>getnote</command></link> command used to locate extended notes is very similar. The specifics of this command will be described in the following section.</para> + <para>The <link linkend="app-c-command-getnote"><command>getnote</command></link> command used to locate extended notes is very similar. The specifics of this command will be described in the subsequent section.</para> <sect2 id="sect-description-reference-query-language"> <title>The reference query language</title> @@ -1087,8 +1119,8 @@ <para>Every search item has the following general form:</para> <informalexample> - <para>:XY:[=|!=|<|>]<replaceable>string</replaceable></para> + <para>:XY:[=|~|!=|!~|<|>]<replaceable>string</replaceable></para> </informalexample> <warning> - <para>The current implementation of &appname; is very picky about spaces. Please make sure that you do not insert spaces or other whitespace on either side of the operators ("=", "!=", "<", ">"). If your value should start with a space, include the value in quotation marks or protect the space with a backslash.</para> + <para>The current implementation of &appname; is very picky about spaces. Please make sure that you do not insert spaces or other whitespace on either side of the operators ("=", "~", "!=", "!~", "<", ">"). If your value should start with a space, include the value in quotation marks or protect the space with a backslash.</para> </warning> <para>The sequence ":XY:" denotes the reference data field to search in. The names are mostly taken from the RIS specification. Possible field names are:</para> @@ -1263,7 +1295,9 @@ <itemizedlist> <listitem> - <para>The alphanumerical fields are matched by (non-)equality to a regular expression. Only the operators "=" and "!=" are accepted, denoting equality and non-equality, respectively. The <replaceable>search-string</replaceable> can contain any legal characters and constructs as in standard Unix regular expressions. By default, the query matches if the search string is contained anywhere in the target string. If you need a left-match, a right-match, or a full match, use the regexp special characters "^" (match the beginning of a line) and "$" (match the end of a line) to your needs. For further information about regular expressions, see the section <link linkend="sect1-regular-expressions">regular expressions</link></para> + <para>The alphanumerical fields are matched by (non-)equality to a literal string or to a regular expression. For literal matches the operators "=" and "!=" are accepted, denoting equality and non-equality, respectively. The <replaceable>search-string</replaceable> is a plain-text string.</para> + <para>Along the same lines, "~" and "!~" denote equality and non-equality for regular expression matches. The <replaceable>search-string</replaceable> can contain any legal characters and constructs as in standard Unix regular expressions. By default, the query matches if the search string is contained anywhere in the target string. If you need a left-match, a right-match, or a full match, use the regexp special characters "^" (match the beginning of a line) and "$" (match the end of a line) to your needs. For further information about regular expressions, see the section <link linkend="sect1-regular-expressions">regular expressions</link></para> <note> <para>Some database engines, like SQLite, do not support Unix-style regular expressions. Use SQL regular expressions instead.</para> + <para>If you use regular expressions, be aware that you will have to escape characters with a special meaning if you want them to be matched literally. For further details, see the <link linkend="sect2-query-examples">examples</link> below.</para> </note> </listitem> @@ -1368,5 +1402,5 @@ <para>This section shows a few example queries to help you get familiar with the syntax. If you are not familiar with the regular expressions used here, please peruse the <link linkend="sect1-regular-expressions">regular expressions section</link>. We will not use any of the fancy switches of the <link linkend="app-c-command-getref"><command>getref</command></link> command here, so the output will always be a simple listing on the screen.</para> <note> - <para>These examples assume that your database engine performs partial matches by default. This holds true for MySQL and PostgreSQL, wherease SQLite always attempts a full match. To emulate partial matches with the latter, append a percent sign (%) after each string to match. See also the section about <link linkend="sect2-regular-expressions-sql">SQL regular expressions</link>.</para> + <para>These examples assume that your database engine supports Unix regular expressions. This holds true for MySQL and PostgreSQL, wherease SQLite uses the simpler SQL regular expressions instead.</para> </note> <para>We'll start with some easy queries. First we want to display a reference with a specific ID (25 in this example):</para> @@ -1374,29 +1408,29 @@ <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :ID:=25</userinput></screen> </informalexample> - <para>Next we want to list all references by a specific author. We'll use only the last name here. If several authors share this last name, we have to specify the initials as well, as shown in the second example. Note the use of the caret "^" which makes sure that the name actually starts with the capital M. Otherwise, a last name like "DeMillerette" would match as well.</para> + <para>Next we want to list all references by a specific author. We'll use only the last name here. If several authors share this last name, we have to specify the initials as well, as shown in the second example. In the first example we use a regular expression match, denoted by the tilde operator. This obviates the need to know the full name precisely. The second example uses a literal match instead. Note the use of the caret "^" in the first example which makes sure that the name actually starts with the capital M. Otherwise, a last name like "DeMillerette" would match as well. This trick is not required in the second example as the literal match always implies a full match.</para> <informalexample> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=^Miller</userinput></screen> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=^Miller,J.D.</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:~^Miller</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=Miller,J.D.</userinput></screen> </informalexample> <para>If Dr. Miller was a productive person, our previous query may have returned dozens of references. Now we try to filter out the paper or the papers that we really need. In the next example, we restrict the results to the years 1995 through 1999:</para> <informalexample> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=^Miller AND :PY:>1994 AND :PY:<2000</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:~^Miller AND :PY:>1994 AND :PY:<2000</userinput></screen> </informalexample> <para>If this did not bring us close enough, we may try to include a coauthor:</para> <informalexample> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=^Miller AND :AU:=^Doe AND :PY:>1994 AND :PY:<2000</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=~Miller AND :AU:~^Doe AND :PY:>1994 AND :PY:<2000</userinput></screen> </informalexample> <para>At this point we could narrow down the search by excluding other authors that often published with Dr. Miller, but are irrelevant here:</para> <informalexample> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=^Miller AND :AU:=^Doe AND NOT (:AU:=^Jones) AND :PY:>1994 AND :PY:<2000</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:~^Miller AND :AU:~^Doe AND NOT (:AU:~^Jones) AND :PY:>1994 AND :PY:<2000</userinput></screen> </informalexample> <para>Unfortunately, this is still a venerable list of publications. Now we try to include a few keywords. This is now a pretty complex query. It will return all references by the authors Miller and Doe between 1995 and 1999 with either the keyword "blood" or the keyword "animal" or the keywords "guanyl" and "cyclase", the latter only if both are present. The truncated spelling of "guanyl" ensures that both "guanylyl" and "guanylate" (which are interchangeable) will match. The funny expressions with the angle brackets ensure that the keywords will match regardless of whether they start with a capital letter or not.</para> <informalexample> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:=^Miller AND :AU:=^Doe AND :PY:>1994 AND :PY:<2000 AND -(:KW:=[bB]lood OR :KW:=[aA]nimal OR (:KW:=[gG]uanyl AND :KW:=[cC]yclase))</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :AU:~^Miller AND :AU:~^Doe AND :PY:>1994 AND :PY:<2000 AND +(:KW:~[bB]lood OR :KW:~[aA]nimal OR (:KW:~[gG]uanyl AND :KW:~[cC]yclase))</userinput></screen> </informalexample> - <para>And now for something completely different. If you've added a couple extended notes to your database, you can retrieve references that are attached to a specific extended note, e.g. to the note with the citation key "Miller1999":</para> + <para>And now for something completely different. If you've added a couple extended notes to your database, you can retrieve references that are attached to a specific extended note, e.g. to the note with the citation key "biochemistry1999":</para> <informalexample> - <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getnote :NCK:=biochemistry1999</userinput></screen> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :NCK:=biochemistry1999</userinput></screen> </informalexample> <para>If you want to see all notes which are attached to a reference with the citation key "Miller1999", use the following command:</para> @@ -1404,4 +1438,16 @@ <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getnote :CK:=Miller1999</userinput></screen> </informalexample> + <para>Regular expressions may have unwanted side effects at times. Consider the keyword "52-67-5 (Penicillamine)" (a chemical name as used by the <ulink url="http://ncbi.nlm.nih.gov">Pubmed</ulink> database). Doing a literal match is straightforward:</para> + <informalexample> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :KW:='52-67-5 (Penicillamine)'</userinput></screen> + </informalexample> + <para>However, if we use the same argument for a regexp match, we won't get the desired results. The parentheses have a special meaning in regular expressions. Therefore we have to escape them if we want a literal match:</para> + <informalexample> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :KW:~'\(Penicillamine\)'</userinput></screen> + </informalexample> + <para>Things are a little different again if you run a database engine that does not use Unix regular expressions, but SQL regular expressions instead. These know only '%' and '_' as special characters, and you have to escape them by doubling:</para> + <informalexample> + <screen format="linespecific"><prompt moreinfo="none">refdbc: </prompt><userinput moreinfo="none">getref :KW:~'100%%'</userinput></screen> + </informalexample> <tip> <para>Remember that if you extend or modify a previous query, you don't have to retype everything: Just use the <keycap>up arrow key</keycap> to scroll through the previous commands, or use <keycombo moreinfo="none"> @@ -1418,5 +1464,5 @@ <para>Some database engines like SQLite do not support Unix-style regular expressions. You have to use SQL regular expressions in this case.</para> </note> - <para>The difference between a search and a regular expression search is that the latter allows some <quote>fuzziness</quote> in the search string. The former requires that the search string and the search result match character by character. In simple words, regular expressions allow to search for strings which are similar to some extent, and you can exactly specify to which extent.</para> + <para>The difference between a literal match and a regular expression match is that the latter allows some <quote>fuzziness</quote> in the search string. The former requires that the search string and the search result match character by character. In simple words, regular expressions allow to search for strings which are similar to some extent, and you can exactly specify to which extent.</para> <sect2 id="sect2-regular-expressions-unix"> <title>Unix-style regular expressions</title> @@ -1529,4 +1575,5 @@ </varlistentry> </variablelist> + <para>In order to match a SQL regular expression special character literally, you have to escape it by doubling.</para> </sect2> </sect1> Index: refdb-manual-chapter11.sgml =================================================================== RCS file: /cvsroot/refdb/refdb/doc/refdb-manual-chapter11.sgml,v retrieving revision 1.12 retrieving revision 1.13 diff -u -U2 -r1.12 -r1.13 --- refdb-manual-chapter11.sgml 30 Dec 2003 23:51:37 -0000 1.12 +++ refdb-manual-chapter11.sgml 8 Feb 2004 00:32:45 -0000 1.13 @@ -66,4 +66,8 @@ </sect2> <sect2> + <title>Character encodings</title> + <para>The RIS specification has not built-in means to specify the character encoding of the data. Commercial applications apparently expect the data to be encoded as ISO-8859-1, aka Latin-1. &appname; does not have this limitation, you are free to use any encoding available on your platform (except UTF-16 and UTF-32). However, you should be aware that this may cause an interchange issue if you plan to use these data in a commercial reference management program. In any case, as the datasets do not specify their encoding, you have to use <option>-E</option> option of the <link linkend="app-c-command-getref">getref</link> command if your input data use an encoding different from the default (ISO-8859-1).</para> + </sect2> + <sect2> <title>RIS tags</title> <para>The following list shows all available tags and their use.</para> @@ -229,5 +233,5 @@ <listitem> <para>Synonym: A1. This is the name of one author of the reference. If a reference has multiple authors, each author is specified with an AU tag on a separate line. The number of authors per RIS dataset is not limited. The sequence of the authors in the authorlist will be determined from the sequence as they appear in the RIS dataset.</para> - <para><emphasis>Format:</emphasis> A string with up to 255 characters in the form: Lastname[,(F.|First)[(M.|Middle)[,Suffix]]]. First and middle names can either be abbreviated or spelled out. Some examples for valid entries:</para> + <para><emphasis>Format:</emphasis> A string with up to 255 characters in the form: Lastname[,(F.|First)[(M.|Middle)...][,Suffix]]. First and middle names can either be abbreviated or spelled out. Use periods to separate initials, and spaces to separate spelled-out first or middle names. Lastname can be a corporate name. Some examples for valid entries:</para> <itemizedlist> <listitem> @@ -568,6 +572,6 @@ <para>Either copy <filename>ris.el</filename> into a directory which is in your load-path (<filename class="directory" moreinfo="none">/usr/local/share/emacs/site-lisp</filename> is a common place for such files), or include <filename class="directory" moreinfo="none">/usr/local/share/&appname;/site-lisp</filename> in your load-path.</para> </step> - <step performance="required"> - <para>If you're a speed fanatic, byte-compile <filename moreinfo="none">ris.el</filename> with the Emacs command <command>M-x byte-compile-file <filename>path/to/ris.el</filename></command>.</para> + <step performance="optional"> + <para>If you're a speed fanatic, byte-compile <filename moreinfo="none">ris.el</filename> with the Emacs command <command>M-x byte-compile-file <filename>path/to/ris.el</filename></command>. You won't notice a difference on systems later than a 486, though.</para> </step> <step performance="required"> @@ -631,5 +635,5 @@ </varlistentry> </variablelist> - <para>If you need other formats than those listed above, you'll either have to provide your own input filter or search the web for existing filters that convert your data to one of the supported formats. A good resource is e.g. <ulink url="http://www.ecst.csuchico.edu/~jacobsd/bib/">Dana Jacob's</ulink> pages about bibliography software.</para> + <para>If you need other formats than those listed above, you'll either have to provide your own input filter or search the web for existing filters that convert your data to one of the supported formats. A decent set of filters is supplied by Chris Putnam's <ulink url="http://www.scripps.edu/~cdputnam/software/bibutils/bibutils.html">bibutils</ulink> package. Another good resource is e.g. <ulink url="http://www.ecst.csuchico.edu/~jacobsd/bib/">Dana Jacob's</ulink> pages about bibliography software.</para> </sect1> <sect1 id="sect1-writing-risx"> @@ -637,4 +641,7 @@ <para>XML documents using the <ulink url="http://refdb.sourceforge.net/dtd/risx/risx.dtd">risx DTD</ulink> are an alternative way to add datasets to &appname; databases. You can use your favourite SGML/XML editor to edit these datasets. You can also use DSSSL or XSLT scripts to transform bibliographic data available as SGML or XML documents to risx.</para> <para>This section provides a quick outline of risx datasets. For a description of all available elements and their relationships, please visit the <ulink url="http://refdb.sourceforge.net/risx/book1.html">risx documentation</ulink>.</para> + <para>As usual, start the document with the processing instructions, followed by the document type declaration. Make sure to include the character encoding if it is different from the default (UTF-8). The other encodings supported by &appname; are UTF-16, ISO-8859-1, and US-ASCII. The first line might then read:</para> + <programlisting format="linespecific"><?xml version="1.0" encoding="utf-8"?> +</programlisting> <para>The top-level element of a risx XML document is either <sgmltag>ris</sgmltag> (if the file provides multiple datasets) or <sgmltag>entry</sgmltag>, which corresponds to a single dataset. The <sgmltag>ris</sgmltag> element holds one or more <sgmltag>entry</sgmltag> elements. The <sgmltag class="attribute">type</sgmltag> attribute specifies the type of the reference. These are the same types as described above for the RIS <link linkend="ris-typetag">TY tag</link>. The <sgmltag class="attribute">id</sgmltag> and <sgmltag class="attribute">citekey</sgmltag> attributes specify a numeric ID (which will only be used if you update references) and a citation key, respectively. The latter should be all uppercase if you intend to use the references with SGML documents.</para> <para>Each <sgmltag>entry</sgmltag> element contains up to five subelements, the first three of which provide the bibliographic information proper. risx distinguishes three levels of bibliographic information. Each <sgmltag>entry</sgmltag> can specify one or more of these levels:</para> @@ -673,5 +680,8 @@ </itemizedlist> <para>Searching for notes is similar to searching for references. Notes may have keywords, keys, and a title attached to them to easily find them. In addition, you can search for notes that link to a particular reference, author, keyword, or periodical. The inverse works as well: you can search for references that are linked to particular notes.</para> - <para>Extended notes are XML documents according to the <ulink url="http://refdb.sourceforge.net/dtd/xnote/xnote.dtd">xnote DTD</ulink>. The structure of these documents is simple enough to do without a separate documentation. If you want to write several extended notes in a file, start with an <sgmltag class="element">xnoteset</sgmltag> element. Each individual extended note is kept in an <sgmltag class="element">xnote</sgmltag> element. This element carries up to four optional attributes:</para> + <para>Extended notes are XML documents according to the <ulink url="http://refdb.sourceforge.net/dtd/xnote/xnote.dtd">xnote DTD</ulink>. The structure of these documents is simple enough to do without a separate documentation. As usual, start the document with the processing instructions, followed by the document type declaration. Make sure to include the character encoding if it is different from the default (UTF-8). The other encodings supported by &appname; are UTF-16, ISO-8859-1, and US-ASCII. The first line might then read:</para> + <programlisting format="linespecific"><?xml version="1.0" encoding="utf-8"?> +</programlisting> +<para>If you want to write several extended notes in a file, start with an <sgmltag class="element">xnoteset</sgmltag> element. Each individual extended note is kept in an <sgmltag class="element">xnote</sgmltag> element. This element carries up to four optional attributes:</para> <variablelist> <varlistentry> Index: refdb-manual-chapter12.sgml =================================================================== RCS file: /cvsroot/refdb/refdb/doc/refdb-manual-chapter12.sgml,v retrieving revision 1.10 retrieving revision 1.11 diff -u -U2 -r1.10 -r1.11 --- refdb-manual-chapter12.sgml 30 Dec 2003 23:51:37 -0000 1.10 +++ refdb-manual-chapter12.sgml 8 Feb 2004 00:32:45 -0000 1.11 @@ -173,4 +173,56 @@ </itemizedlist> </sect1> + <sect1 id="sect1-character-encoding"> + <title>Character encoding issues</title> + <para>The 7-bit ASCII character set originally employed by PC computers in the days of yore turned out to be insufficient for languages other than English. Reference data may require characters not included in the ASCII character set. The string sorting order may also follow different rules. &appname; supports national character sets as well as Unicode, which is sort of a superset of all national character sets. As a &appname; user and administrator you'll have to deal with character encoding issues at different levels.</para> + <sect2> + <title>Character encodings of databases</title> + <para>While it is possible to convert the data during import and export (see the following sections), it is still worthwile to spend a few thoughts about the character encoding used by your reference databases. If possible, use an encoding that ensures a suitable string sorting order for your data. Choosing a proper encoding also avoids unnecessary character encoding conversions when importing or exporting data.</para> + <para>The available encodings are limited by your database engine:</para> + <variablelist> + <varlistentry> + <term>SQLite</term> + <listitem> + <para>SQLite currently supports only ISO-8859-1 (the default) and UTF-8 as a compile-time option. If you install a binary package, it most likely uses ISO-8859-1.</para> + </listitem> + </varlistentry> + <varlistentry> + <term>MySQL</term> + <listitem> + <para>This database engine supports a fairly large number of encodings, but versions prior to 4.1 allow only one encoding per server instance. That is, all databases have to use the same character encoding. Please see the <ulink url="http://www.mysql.org">MySQL documentation</ulink> for the growing list of supported encodings</para> + </listitem> + </varlistentry> + <varlistentry> + <term>PostgreSQL</term> + <listitem> + <para>This database engine supports a variety of encodings as a per-database option. That is, all reference databases may use different encodings. Please see the <ulink url="http://www.postgresql.org">PostgreSQL documentation</ulink> for a current list of supported encodings.</para> + </listitem> + </varlistentry> + </variablelist> + </sect2> + <sect2> + <title>Character encodings of imported data</title> + <para>We'll have to distinguish two different sorts of data:</para> + <variablelist> + <varlistentry> + <term>RIS</term> + <listitem> + <para>This plain-text format does not have a built-in way to declare the character encoding of the data. Instead you have to use the <option>-E</option> option of the <link linkend="app-c-command-addref">addref</link> and <link linkend="app-c-command-updateref">updateref</link> commands to specify the encoding if it is different from the default (ISO-8859-1).</para> + <para>Please note that the import filters <link linkend="sect-medtorispl">med2ris.pl</link>, <link linkend="sect-entorispl">en2ris.pl</link>, and to a limited extent also <link linkend="sect-marctoris">marc2ris.pl</link> support on-the-fly character encoding conversion.</para> + </listitem> + </varlistentry> + <varlistentry> + <term>risx and xnote</term> + <listitem> + <para>These are XML formats that can use the XML way of declaring the encoding. This is done in the processing instructions, which is the first line in a XML file. Due to a limitation of the parser used for importing XML data, only four encodings are accepted by &appname;: UTF-8, UTF-16, ISO-8859-1, US-ASCII. If your data use a different encoding, use the <command moreinfo="none">iconv</command> command line utility (usually a part of the libiconv package) to convert your data to one of the accepted encodings.</para> + </listitem> + </varlistentry> + </variablelist> + </sect2> + <sect2> + <title>Character encodings of exported data</title> + <para>By default, data are exported without a character conversion, i.e. the data will use whatever encoding the database uses. If you want the exported data in a different format, request the encoding with the <option>-E</option> option. This option is accepted by the <link linkend="app-c-command-getref">getref</link> and <link linkend="app-c-command-getnote">getnote</link> commands of &appname;c as well as by the <link linkend="chapter-refdbib">&appname;ib</link> client. You may request any encoding that your local libiconv installation supports. <command moreinfo="none">man 3 iconv</command> or <command moreinfo="none">man iconv_open</command> should give a clue which encodings are available.</para> + </sect2> + </sect1> <sect1 id="sect1-pdfroot"> <title>Use pdfroot</title> Index: refdb-manual-chapter13.sgml =================================================================== RCS file: /cvsroot/refdb/refdb/doc/refdb-manual-chapter13.sgml,v retrieving revision 1.14 retrieving revision 1.15 diff -u -U2 -r1.14 -r1.15 --- refdb-manual-chapter13.sgml 30 Dec 2003 23:51:37 -0000 1.14 +++ refdb-manual-chapter13.sgml 8 Feb 2004 00:32:45 -0000 1.15 @@ -22,4 +22,5 @@ <arg choice="opt">-D <replaceable>stylespec-directory</replaceable></arg> <arg choice="opt">-e <replaceable>log destination</replaceable></arg> + <arg choice="opt">-E <replaceable>encoding</replaceable></arg> <arg choice="opt" rep="norepeat">-f <replaceable>stdin</replaceable></arg> <arg>-h</arg> @@ -44,4 +45,5 @@ <para>Use the <option>-d</option> option to specify the database that you want to work with. This will be the default database for all references unless the input file explicitly states the database for some or all citations.</para> <para>&appname;ib will create a style specification file for the subsequent transformation of your document with each run, unless you use the <option>-n</option> switch. These files will be stored in the directory that you specify with the <option>-D</option> option. Specify either a full path or "." to use the current working directory. The latter case is what you usually want if you run &appname;ib from the directory where your LaTeX or SMGL/XML document is stored. This is also the default if you do not specify a directory at all.</para> + <para>Use the <option>-E</option> option to select a useful output character encoding. If this option is not used, the bibliography data will use the character encoding of the database. See <command moreinfo="none">man iconv_open</command> for a list of available encodings.</para> <para>The <option>-f stdin</option> option is a crutch to make reading data from stdin possible on platforms that do not allow automatic detection of data on stdin, like Windows/Cygwin. On other platforms, &appname;ib automatically reads data from stdin if data are available.</para> <para>The <option>-n</option> option instructs &appname;ib not to create style specification files. Use this option if you are sure that a current style specification file already exists. This saves about as much computing time as you need to key in this option.</para> @@ -50,5 +52,5 @@ <para>The <option>-t</option> option selects the type of output. Use "db31" to generate DocBook SGML bibliographies, "db31x" for DocBook XML bibliographies, "teix" for TEI XML bibliographies, and "bibtex" for BibTeX bibliographies. The type of output also determines the type of style specification file, if any, that will be generated in addition to the bibliography for formatting purposes. This is only a matter of concern if you want to process a DocBook XML document with the DSSSL stylesheets: In this case you should use "db31" with this option. The SGML bibliography element is also a valid XML element, but you will get a DSSSL driver file instead of a XSL driver file when you use "db31x".</para> <note> - <para>In the current implementation, the <option>-t teix</option> option will also return a DocBook bibliography which needs to be transformed to a TEI bibliography with the <link linkend="sect-bibdb2tei"><filename>bibdb2tei.dsl</filename></link> DSSSL stylesheet.</para> + <para>In the current implementation, the <option>-t teix</option> option will also return a DocBook bibliography which needs to be transformed to a TEI bibliography with the <link linkend="sect-bibdb2tei"><filename>bibdb2tei.xsl</filename></link> DSSSL stylesheet.</para> </note> <para>The purpose of all other command-line switches is explained in the section <link linkend="sect1-common-command-line-options">common command-line options</link>.</para> @@ -166,4 +168,9 @@ <entry>The number where the reference numbering starts at. This option is mostly useful for compiling advanced bibliographies or for C boneheads who insist that counting starts at zero.</entry> </row> + <row> + <entry>encoding</entry> + <entry>(the database encoding)</entry> + <entry>The character encoding for the bibliography output. If this is not specified, the data will use the same encoding as the database.</entry> + </row> </tbody> </tgroup> Index: refdb-manual-chapter14.sgml =================================================================== RCS file: /cvsroot/refdb/refdb/doc/refdb-manual-chapter14.sgml,v retrieving revision 1.8 retrieving revision 1.9 diff -u -U2 -r1.8 -r1.9 --- refdb-manual-chapter14.sgml 30 Dec 2003 23:51:37 -0000 1.8 +++ refdb-manual-chapter14.sgml 8 Feb 2004 00:32:45 -0000 1.9 @@ -12,4 +12,5 @@ <title>The &appname;nd shell script</title> <para>This script should be the first choice for novices to create new SGML or XML documents for use with &appname;. If called without arguments, the script runs in an interactive mode and collects a few answers about the new document. Based on these answers it will create a skeleton document and a custom-tailored Makefile that performs all necessary steps to create formatted output from the document.</para> + <para>Alternatively you can call this script from a directory that contains an existing SGML or XML file. Pass the full name to the script when it asks for a filename, and the script will try to guess some of the settings from the existing file.</para> <para>The script can create the following document types:</para> <itemizedlist> @@ -223,5 +224,5 @@ <term>-s <replaceable>stylesheet</replaceable></term> <listitem> - <para>This selects the stylesheet driver file. This file is generated by &appname;bib (which in turn is called by runbib) and contains additional formatting information.</para> + <para>This selects the stylesheet driver file. This file is generated by &appname;ib (which in turn is called by runbib) and contains additional formatting information.</para> </listitem> </varlistentry> @@ -295,8 +296,8 @@ </sect1> <sect1> - <title id="sect-bibdb2tei">The bibdb2tei.dsl stylesheet</title> - <para>This DSSSL stylesheet transforms a DocBook bibliography as g... [truncated message content] |
From: Markus H. <mho...@us...> - 2004-02-07 14:58:54
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3011 Modified Files: strfncs.c Log Message: added a couple of NULL checks Index: strfncs.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/strfncs.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -U2 -r1.13 -r1.14 --- strfncs.c 29 Jun 2003 23:09:01 -0000 1.13 +++ strfncs.c 7 Feb 2004 14:56:01 -0000 1.14 @@ -133,4 +133,8 @@ struct hostent *hostinfo; + if (server_ip == NULL) { + return 1; + } + if (is_ip(server_ip)) { return 0; /* looks like a dotted quad */ @@ -184,4 +188,8 @@ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int is_port(char *port) { + if (port == NULL) { + return 0; + } + if (atoi(port) < 1024) { return 0; @@ -240,4 +248,8 @@ register char *s, *t; + if (string == NULL) { + return NULL; + } + s = string; @@ -288,4 +300,8 @@ char* chr; + if (string == NULL) { + return NULL; + } + chr = string; /* don't modify string, we need it as a return value */ @@ -309,4 +325,8 @@ char* chr; + if (string == NULL) { + return NULL; + } + chr = string; /* don't modify string, we need it as a return value */ |
From: Markus H. <mho...@us...> - 2004-02-07 14:57:35
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2797 Modified Files: refdbdref.c Log Message: changed return message for added/updated refs Index: refdbdref.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdref.c,v retrieving revision 1.62 retrieving revision 1.63 diff -u -U2 -r1.62 -r1.63 --- refdbdref.c 7 Feb 2004 14:51:44 -0000 1.62 +++ refdbdref.c 7 Feb 2004 14:54:40 -0000 1.63 @@ -698,5 +698,5 @@ else if (result == 2) { ptr_addresult->updated++; - sprintf(return_msg, "Updating set %d successful\n", set_count + nongeek_offset); + sprintf(return_msg, "Updating input set %d successful\n", set_count + nongeek_offset); LOG_PRINT(LOG_INFO, "dataset updated successfully"); numbyte = tiwrite(ptr_clrequest->fd, return_msg, TERM_YES); @@ -709,5 +709,5 @@ else { ptr_addresult->failure++; - sprintf(return_msg, "Processing set %d failed\n", set_count + nongeek_offset); + sprintf(return_msg, "Processing input set %d failed\n", set_count + nongeek_offset); LOG_PRINT(LOG_WARNING, "failed processing dataset"); numbyte = tiwrite(ptr_clrequest->fd, return_msg, TERM_YES); |
From: Markus H. <mho...@us...> - 2004-02-07 14:55:41
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2553 Modified Files: risxhandler.c Log Message: added support to count datasets from 0 or 1 Index: risxhandler.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/risxhandler.c,v retrieving revision 1.24 retrieving revision 1.25 diff -u -U2 -r1.24 -r1.25 --- risxhandler.c 25 Jan 2004 19:31:56 -0000 1.24 +++ risxhandler.c 7 Feb 2004 14:52:47 -0000 1.25 @@ -50,4 +50,5 @@ extern int n_log_level; +extern int nongeek_offset; extern struct BSTRING connerr; extern struct BSTRING outomem; @@ -1712,5 +1713,12 @@ else { my_dbi_conn_unlock(ptr_ardata->conn); - sprintf(sql_command, "Adding set "ULLSPEC" successful\n", (unsigned long long)(ptr_ardata->set_count)); + if (ptr_ardata->create_new) { + sprintf(sql_command, "Adding input set "ULLSPEC" successful\n", (unsigned long long)(ptr_ardata->set_count + nongeek_offset)); + (ptr_ardata->added_count)++; + } + else { + sprintf(sql_command, "Updating input set "ULLSPEC" successful\n", (unsigned long long)(ptr_ardata->set_count + nongeek_offset)); + (ptr_ardata->updated_count)++; + } if ((new_msgpool = mstrcat(ptr_ardata->msgpool, sql_command, &(ptr_ardata->msgpool_len), 0)) == NULL) { LOG_PRINT(LOG_WARNING, outomem.text); @@ -1721,11 +1729,5 @@ ptr_ardata->msgpool = new_msgpool; } - /* ToDo: see whether updated or added */ - if (ptr_ardata->create_new) { - (ptr_ardata->added_count)++; - } - else { - (ptr_ardata->updated_count)++; - } + (ptr_ardata->set_count)++; } |
From: Markus H. <mho...@us...> - 2004-02-07 14:54:37
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2326 Modified Files: refdbdref.c Log Message: added support to count datasets from 0 or 1 Index: refdbdref.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdref.c,v retrieving revision 1.61 retrieving revision 1.62 diff -u -U2 -r1.61 -r1.62 --- refdbdref.c 4 Feb 2004 21:19:05 -0000 1.61 +++ refdbdref.c 7 Feb 2004 14:51:44 -0000 1.62 @@ -59,4 +59,5 @@ extern char refdblib[]; /* location of shareable data */ extern int n_log_level; /* numeric version of log_level */ +extern int nongeek_offset; /* :-) */ /* forward declaration of local functions */ @@ -686,5 +687,5 @@ if (result == 1) { ptr_addresult->success++; - sprintf(return_msg, "Adding set %d successful\n", set_count); + sprintf(return_msg, "Adding input set %d successful\n", set_count + nongeek_offset); LOG_PRINT(LOG_INFO, "dataset added successfully"); numbyte = tiwrite(ptr_clrequest->fd, return_msg, TERM_YES); @@ -697,5 +698,5 @@ else if (result == 2) { ptr_addresult->updated++; - sprintf(return_msg, "Updating set %d successful\n", set_count); + sprintf(return_msg, "Updating set %d successful\n", set_count + nongeek_offset); LOG_PRINT(LOG_INFO, "dataset updated successfully"); numbyte = tiwrite(ptr_clrequest->fd, return_msg, TERM_YES); @@ -708,5 +709,5 @@ else { ptr_addresult->failure++; - sprintf(return_msg, "Processing set %d failed\n", set_count); + sprintf(return_msg, "Processing set %d failed\n", set_count + nongeek_offset); LOG_PRINT(LOG_WARNING, "failed processing dataset"); numbyte = tiwrite(ptr_clrequest->fd, return_msg, TERM_YES); |
From: Markus H. <mho...@us...> - 2004-02-07 14:50:23
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1584 Modified Files: refdbd.c Log Message: added config option to count from 0 or 1; always uppercase encoding names to make them case-insensitive Index: refdbd.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbd.c,v retrieving revision 1.72 retrieving revision 1.73 diff -u -U2 -r1.72 -r1.73 --- refdbd.c 31 Jan 2004 14:50:22 -0000 1.72 +++ refdbd.c 7 Feb 2004 14:47:29 -0000 1.73 @@ -62,5 +62,5 @@ #endif -Prefs prefs[21] = { +Prefs prefs[22] = { {"serverip", ""}, {"timeout", ""}, @@ -83,4 +83,5 @@ {"remoteconnect", ""}, {"in_encoding", ""}, + {"nongeek_offset", ""}, {"", ""} }; @@ -112,4 +113,5 @@ char default_input_encoding[PREFS_BUF_LEN] = "ISO-8859-1"; /* default char encoding for input data */ char keyword_scan[PREFS_BUF_LEN] = ""; /* run automatic keyword scan if 't' */ +char ng_offset[PREFS_BUF_LEN] = "1"; /* 0 for geeks, 1 for humans */ char confdir[_POSIX_PATH_MAX+1] = ""; /* path to the config files */ @@ -131,4 +133,5 @@ int gotchldsig = 0; /* set to 1 if SIGCHLD received */ int n_cgi = 0; /* if 1, data are requested for cgi output */ +int nongeek_offset = 1; /* 0 for geeks, 1 for humans */ FILE* fp_log_file = NULL; /* a FILE pointer to a custom log file */ dbi_result dbi_style_res = NULL; /* used by backends to load style information */ @@ -233,4 +236,5 @@ prefs[18].varvalue = remote_connect; prefs[19].varvalue = default_input_encoding; + prefs[20].varvalue = ng_offset; *dbi_driver_dir = '\0'; @@ -368,4 +372,9 @@ n_remote_admin = (*remote_admin == 't') ? 1:0; n_remote_connect = (*remote_connect == 't') ? 1:0; + nongeek_offset = atoi(ng_offset); + + /* uppercase the encoding names */ + strup(default_db_encoding); + strup(default_input_encoding); if (!strcmp(ptr_clrequest->dbserver, "sqlite") && *ip_or_path) { @@ -803,6 +812,6 @@ /* use one of the following options to debug the child process */ - /* printf("child sleeps PID=%d\n", getpid()); */ - /* sleep(10); */ +/* printf("child sleeps PID=%d\n", getpid()); */ +/* sleep(10); */ /* alternatively you can sleep until */ /* an explicit user action (i.e., kill -CONT or "gdb, c"), */ @@ -971,4 +980,5 @@ strncpy(db_encoding, optarg, PREFS_BUF_LEN); db_encoding[PREFS_BUF_LEN-1] = '\0'; + strup(db_encoding); break; case 'G': @@ -1715,4 +1725,5 @@ if (strncmp(child_inbuffer1, positive.text, positive.length) != 0) { LOG_PRINT(LOG_WARNING, "no positive ack from client"); + LOG_PRINT(LOG_DEBUG, child_inbuffer1); } |
From: Markus H. <mho...@us...> - 2004-02-04 21:21:33
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20879 Modified Files: refdbdbib.c refdbdnote.c refdbdref.c Log Message: increase output buffer size to six times the input size Index: refdbdbib.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdbib.c,v retrieving revision 1.34 retrieving revision 1.35 diff -u -U2 -r1.34 -r1.35 --- refdbdbib.c 31 Jan 2004 14:50:22 -0000 1.34 +++ refdbdbib.c 4 Feb 2004 21:19:04 -0000 1.35 @@ -920,7 +920,6 @@ inlength = strlen(outbuffer) + 1; /* with the encodings supported by our database engines, the converted - string can't be longer than four times the input string */ - /* todo: is this assumption correct? */ - outlength = 4*inlength; + string can't be longer than six times the input string */ + outlength = 6*inlength; orig_outlength = outlength; Index: refdbdnote.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdnote.c,v retrieving revision 1.22 retrieving revision 1.23 diff -u -U2 -r1.22 -r1.23 --- refdbdnote.c 1 Feb 2004 00:36:20 -0000 1.22 +++ refdbdnote.c 4 Feb 2004 21:19:05 -0000 1.23 @@ -2078,7 +2078,6 @@ inlength = strlen(sql_command) + 1; /* with the encodings supported by our database engines, the converted - string can't be longer than four times the input string */ - /* todo: is this assumption correct? */ - outlength = 4*inlength; + string can't be longer than six times the input string */ + outlength = 6*inlength; if ((my_sql_command = (char*)malloc(outlength)) == NULL) { Index: refdbdref.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbdref.c,v retrieving revision 1.60 retrieving revision 1.61 diff -u -U2 -r1.60 -r1.61 --- refdbdref.c 1 Feb 2004 00:36:20 -0000 1.60 +++ refdbdref.c 4 Feb 2004 21:19:05 -0000 1.61 @@ -625,14 +625,14 @@ /* run a character encoding conversion if required */ - if (conv_descriptor && *ris_set) { - inlength = strlen(ris_set) + 1; + if (conv_descriptor && numbyte) { + inlength = numbyte - TERM_LEN; /* with the encodings supported by our database engines, the converted - string can't be longer than four times the input string */ - outlength = 4*inlength; + string can't be longer than six times the input string */ + outlength = 6*inlength; if ((my_ris_set = (char*)malloc(outlength)) == NULL) { ptr_addresult->failure++; sprintf(return_msg, "out of memory\n"); - LOG_PRINT(LOG_WARNING, "failed processing dataset"); + LOG_PRINT(LOG_WARNING, "failed processing dataset: out of memory"); numbyte = tiwrite(ptr_clrequest->fd, return_msg, TERM_YES); if (numbyte == -1) { @@ -2020,5 +2020,5 @@ /* send message to client */ sprintf(return_msg, "ID "ULLSPEC" successfully picked\n", (unsigned long long)(ptr_curr->value)); - iwrite(ptr_clrequest->fd, return_msg, strlen(return_msg)); + tiwrite(ptr_clrequest->fd, return_msg, TERM_NO); } } @@ -4084,9 +4084,10 @@ char* my_sql_command_start = NULL; /* records initial state of my_elvalue */ const char* my_instring = NULL; /* this ptr will be modified by iconv() */ + /* strlen should be ok here as this can't be a multibyte encoding */ inlength = strlen(sql_command) + 1; /* with the encodings supported by our database engines, the converted - string can't be longer than four times the input string */ + string can't be longer than six times the input string */ /* todo: is this assumption correct? */ - outlength = 4*inlength; + outlength = 6*inlength; if ((my_sql_command = (char*)calloc(outlength, sizeof(char))) == NULL) { @@ -4138,5 +4139,5 @@ sql_command = my_sql_command_start; sql_command_len = outlength; - result_len = (size_t)(my_sql_command - my_sql_command_start); + result_len = (size_t)(my_sql_command - my_sql_command_start - 1); } else { /* no conversion required */ |
From: Markus H. <mho...@us...> - 2004-02-04 21:09:52
|
Update of /cvsroot/refdb/refdb/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17662 Modified Files: refdbda.c Log Message: fixed wrong TERM_LEN comparison Index: refdbda.c =================================================================== RCS file: /cvsroot/refdb/refdb/src/refdbda.c,v retrieving revision 1.40 retrieving revision 1.41 diff -u -U2 -r1.40 -r1.41 --- refdbda.c 31 Jan 2004 14:50:22 -0000 1.40 +++ refdbda.c 4 Feb 2004 21:07:21 -0000 1.41 @@ -1536,5 +1536,5 @@ while (!nread_done) { numbyte = tread(ptr_clrequest->fd, inbuffer, COMMAND_INBUF_LEN-1); - if (get_trailz(inbuffer, numbyte) < TERM_LEN) { /* if transmission complete */ + if (get_trailz(inbuffer, numbyte) >= TERM_LEN) { /* if transmission complete */ nread_done = 1; } |