From: Chas W. <ch...@cm...> - 1999-08-21 22:51:10
|
here is a diff to the 1.5 version of the dict-misc package that will allow it to format the publicly available roget's thesaurus. it not quite perfect yet (a few problems -- mostly related to headwords wrapping to the next line, any ideas?) i downloaded roget's from ftp://crl.nmsu.edu/CLR/lexica/roget-1911/t1911a.zip but i am not sure if this is the 'cannonical' source. [on a related side note, i have started writing a perl script to convert the ced3 prolog database back to a more dict friendly format -- any interest given the limbo state of the copyright of this database?] --- dictfmt.c.000 Fri Aug 20 11:42:45 1999 +++ dictfmt.c Sat Aug 21 07:19:47 1999 @@ -44,6 +44,7 @@ #define PERIODIC 4 #define HITCHCOCK 5 #define CIA1995 6 +#define ROGET 7 #define BSIZE 10240 @@ -214,13 +215,14 @@ char *s, *d; char *buf; - while ((c = getopt(argc, argv, "jfephDu:s:c:")) != EOF) + while ((c = getopt(argc, argv, "jfephrDu:s:c:")) != EOF) switch (c) { case 'j': type = JARGON; break; case 'f': type = FOLDOC; break; case 'e': type = EASTON; break; case 'p': type = PERIODIC; break; case 'h': type = HITCHCOCK; break; + case 'r': type = ROGET; break; case 'D': ++Debug; break; case 'u': url = optarg; break; case 's': sname = optarg; break; @@ -391,6 +393,54 @@ && buffer[0] == buffer[2] && buffer[0] == buffer[3]) continue; /* Skip lines with *'s and ='s */ + } + break; + case ROGET: //chas + switch (*buffer) { + case ' ': + if (buffer[5] == '#') { + header = 1; + s = &buffer[5]; + if ((s = strchr( s, ' ' ))) { + while (*s == ' ') ++s; + + if (*s == '[') { + for(c=0; c<strlen(s) && s[c]!=']' && s[c]!=')'; ) ++c; + ++c; + s = &s[c]; + } + + while (*s == ' ') ++s; + + for(c=0; c<strlen(s) && s[c] != ' ' && s[c] != '.'; ) ++c; + d = &s[c]; + //d = strchr( s, '.'); + *d = '\0'; +#ifdef notdef + fprintf(stderr, "headword -> %s\n", s); +#endif + fmt_newheadword(s, 0); + + *d = '.'; + } + } + + /* skip lines starting with 12 blank spaces... */ + for(c=0; c<11 && buffer[c] == ' '; ) ++c; + if (c == 11) + continue; + + break; + case '<': + continue; +#ifdef notdef + case '\n': + case '\0': + header = 0; + break; + default: + if (!header) continue; // dont print unless in defn +#endif } break; case PERIODIC: |