[Refdb-cvs] CVS: refdb/src authorinfo.c,1.3,1.4 tokenize.c,1.10,1.11
Status: Beta
Brought to you by:
mhoenicka
|
From: Markus H. <mho...@us...> - 2004-01-06 23:06:40
|
Update of /cvsroot/refdb/refdb/src
In directory sc8-pr-cvs1:/tmp/cvs-serv722
Modified Files:
authorinfo.c tokenize.c
Log Message:
added support for hyphenated double initials in author names
Index: authorinfo.c
===================================================================
RCS file: /cvsroot/refdb/refdb/src/authorinfo.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -U2 -r1.3 -r1.4
--- authorinfo.c 28 Dec 2003 00:43:19 -0000 1.3
+++ authorinfo.c 6 Jan 2004 23:06:36 -0000 1.4
@@ -45,4 +45,5 @@
char* temp_middle;
char* new_temp_author;
+ char bitsandpieces[6];
size_t temp_author_len = 256;
@@ -71,15 +72,35 @@
}
- if ((new_temp_author = mstrcat(temp_author, ptr_ainfo->firstname, &temp_author_len, 0)) == NULL) {
- (*ptr_error)++;
- free(temp_author);
- return NULL;
+ /* we've got to analyze whether this is a full first name or some
+ hyphenated double initial */
+ if (ptr_ainfo->firstname[1] == '-') {
+ *bitsandpieces = *(ptr_ainfo->firstname);
+ *(bitsandpieces + 1) = '.';
+ *(bitsandpieces + 2) = '-';
+ *(bitsandpieces + 3) = *(ptr_ainfo->firstname + 2);
+ *(bitsandpieces + 4) = '.';
+ *(bitsandpieces + 5) = '\0';
+ }
+ else if (*(ptr_ainfo->firstname + 1) == '\0') {
+ *bitsandpieces = *ptr_ainfo->firstname;
+ *(bitsandpieces + 1) = '.';
+ *(bitsandpieces + 2) = '\0';
}
else {
- temp_author = new_temp_author;
+ *bitsandpieces = '\0';
}
- if (ptr_ainfo->firstname[1] == '\0') {
- if ((new_temp_author = mstrcat(temp_author, ".", &temp_author_len, 0)) == NULL) {
+ if (*bitsandpieces) {
+ if ((new_temp_author = mstrcat(temp_author, bitsandpieces, &temp_author_len, 0)) == NULL) {
+ (*ptr_error)++;
+ free(temp_author);
+ return NULL;
+ }
+ else {
+ temp_author = new_temp_author;
+ }
+ }
+ else {
+ if ((new_temp_author = mstrcat(temp_author, ptr_ainfo->firstname, &temp_author_len, 0)) == NULL) {
(*ptr_error)++;
free(temp_author);
@@ -112,16 +133,35 @@
}
- if ((new_temp_author = mstrcat(temp_author, item, &temp_author_len, 0)) == NULL) {
- (*ptr_error)++;
- free(temp_author);
- free(temp_middle);
- return NULL;
+ /* check whether middle name is hyphenated initial */
+ if (*(item + 1) == '-') {
+ *bitsandpieces = *item;
+ *(bitsandpieces + 1) = '.';
+ *(bitsandpieces + 2) = '-';
+ *(bitsandpieces + 3) = *(item + 2);
+ *(bitsandpieces + 4) = '.';
+ *(bitsandpieces + 5) = '\0';
+ }
+ else if (*(item + 1) == '\0') {
+ *bitsandpieces = *item;
+ *(bitsandpieces + 1) = '.';
+ *(bitsandpieces + 2) = '\0';
}
else {
- temp_author = new_temp_author;
+ *bitsandpieces = '\0';
}
- if (item[1] == '\0') {
- if ((new_temp_author = mstrcat(temp_author, ".", &temp_author_len, 0)) == NULL) {
+ if (*bitsandpieces) {
+ if ((new_temp_author = mstrcat(temp_author, bitsandpieces, &temp_author_len, 0)) == NULL) {
+ (*ptr_error)++;
+ free(temp_author);
+ free(temp_middle);
+ return NULL;
+ }
+ else {
+ temp_author = new_temp_author;
+ }
+ }
+ else {
+ if ((new_temp_author = mstrcat(temp_author, item, &temp_author_len, 0)) == NULL) {
(*ptr_error)++;
free(temp_author);
Index: tokenize.c
===================================================================
RCS file: /cvsroot/refdb/refdb/src/tokenize.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -U2 -r1.10 -r1.11
--- tokenize.c 6 Jan 2004 15:41:57 -0000 1.10
+++ tokenize.c 6 Jan 2004 23:06:36 -0000 1.11
@@ -971,11 +971,33 @@
}
- /* now there's two options:
+ /* now there's three options:
- the next char is the first initial, followed by a period
+ - the next char is the first initial of a hyphenated double
+ name
- the next word is the full first name */
if (*(ptr_atoken->first + 1) == '.') {
- *(ptr_atoken->first + 1) = '\0';
- middle = ptr_atoken->first + 2;
+ if (*(ptr_atoken->first + 2) == '-') {
+ /* now we've got to find the end of the hyphenated name, remove
+ the periods, and terminate the string appropriately */
+ middle = ptr_atoken->first + 3;
+ while (*middle && *middle != ' ') {
+ middle++;
+ }
+
+ if (*(middle-1) == '.') {
+ *(middle-1) = '\0';
+ }
+ else if (*middle == ' ') {
+ *middle = '\0';
+ middle++;
+ }
+
+ memmove(ptr_atoken->first + 1, ptr_atoken->first + 2, strlen(ptr_atoken->first + 2)+1);
+ }
+ else {
+ *(ptr_atoken->first + 1) = '\0';
+ middle = ptr_atoken->first + 2;
+ }
}
else if (*(ptr_atoken->first + 1)) { /* string doesn't end here */
@@ -1008,6 +1030,26 @@
/* middle name could be abbreviated, so look for a dot */
if (*(middle + 1) == '.') {
- *(middle + 1) = '\0';
- next_middle = middle + 2;
+ if (*(middle + 2) == '-') {
+ /* now we've got to find the end of the hyphenated name, remove
+ the periods, and terminate the string appropriately */
+ next_middle = middle + 3;
+ while (*next_middle && *next_middle != ' ') {
+ next_middle++;
+ }
+
+ if (*(next_middle-1) == '.') {
+ *(next_middle-1) = '\0';
+ }
+ else if (*next_middle == ' ') {
+ *next_middle = '\0';
+ next_middle++;
+ }
+
+ memmove(middle + 1, middle + 2, strlen(middle + 2)+1);
+ }
+ else {
+ *(middle + 1) = '\0';
+ next_middle = middle + 2;
+ }
}
else {
|