Thread: [Refdb-cvs] CVS: refdb/scripts en2ris.in,NONE,1.1.2.1
Status: Beta
Brought to you by:
mhoenicka
From: Markus H. <mho...@us...> - 2005-03-29 20:23:11
|
Update of /cvsroot/refdb/refdb/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17253 Added Files: Tag: Release_0_9_5_stable en2ris.in Log Message: replaces en2ris.pl.in --- NEW FILE --- #!/usr/bin/perl ## en2ris.pl: converts EndNote "RIS" datasets to RIS format ## ## usage: perl en2ris.pl < endnote.ris > outfile.ris ## ## Dependencies: perl 5.0.0 or later ## RefDB::CGI ## RefDB::Pref ## RefDB::Log ## Text::Iconv ## ## ma...@mh... 2003-04-27 ## $Id: en2ris.in,v 1.1.2.1 2005/03/29 20:23:01 mhoenicka Exp $ ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## change character encoding on the fly use Text::Iconv; ## the common RefDB modules use RefDB::Log; use RefDB::Prefs; ## use this module to read command line options use Getopt::Std; ## this one is for syslog (who'd have guessed) use Sys::Syslog; ## this is for the config file stuff my $prefs; ## read config file settings my $confdir = "<sysconfdir>"; my $read_prefs = 1; my $next; ## look for -q and -y options foreach $arg (@ARGV) { # my $next; if ($next) { $confdir = $arg; $next = 0; } elsif ($arg eq "-y") { $next = 1; } elsif ($arg eq "-q") { $read_prefs = 0; } } ## read config files if ($read_prefs) { my $home = $ENV{'HOME'}; $prefs = RefDB::Prefs::->new("$confdir/en2risrc", "$home/en2risrc"); } #### variables to hold config options. Will be initialized by whatever #### was in the config files ## name of the output file, if any. If undef, send data to stdout my $outfile = $prefs->{"outfile"}; ## if f, overwrite; if t, append my $append = (defined($prefs->{"outappend"})) ? $prefs->{"outappend"} : "f"; ## encodings to use for input and output. empty string uses the locale my $from_enc = (defined($prefs->{"from_enc"})) ? $prefs->{"from_enc"} : "ISO-8859-1"; my $to_enc = (defined($prefs->{"to_enc"})) ? $prefs->{"to_enc"} : "ISO-8859-1"; ## logging options my $logfile = (defined($prefs->{"logfile"})) ? $prefs->{"logfile"} : "/var/log/en2ris.log"; my $loglevel = (defined($prefs->{"loglevel"})) ? $prefs->{"loglevel"} : 6; my $logdest = (defined($prefs->{"logdest"})) ? $prefs->{"logdest"} : 2; ## 0 = stderr, 1 = syslog, 2 = file ## this hash will receive the command line options my %opts; ## the switches are: ## -e dest: log destination ## -f enc: input encoding ## -h: prints help ## -l level: log level ## -L file: log file ## -o/-O file: specifies output file for writing/appending ## -q: ignore config file ## -t enc: output encoding ## -y path: set confdir getopts('e:f:hl:L:o:O:qt:y:', \%opts); ## loop over all command line options while (($key, $value) = each %opts) { if ($key eq "e") { $logdest = $value; } elsif ($key eq "f") { $from_enc = $value; } elsif ($key eq "h") { print "en2ris.pl turns EndNote \"RIS\" output into RIS\n"; print "Usage: [perl] en2ris.pl [-e dest] [-f enc] [-h] [-l level] [-L logfile] [(-o|-O) outfile] [-q] [-t enc] [-y path]\n Reads EndNote \"RIS\" data from stdin. Output is sent to stdout unless one of the -o/-O options is used\nOptions: -e dest log destination (stderr|syslog|file)\n -f enc input encoding\n -h print this help and exit\n -l loglevel set log level (0-7)\n -L logfile path of custom log file\n -o outfile send output to outfile (overwrite)\n -O outfile send output to outfile (append)\n -q ignore config file\n -t enc output encoding\n -y path set custom config file path\n"; exit(0); } elsif ($key eq "l") { $loglevel = $value; } elsif ($key eq "L") { $logfile = $value; } elsif ($key eq "o") { $outfile = $value; } elsif ($key eq "O") { $outfile = $value; $append = t; } elsif ($key eq "q") { ## do nothing, -q was used before getopts } elsif ($key eq "t") { $to_enc = $value; } elsif ($key eq "y") { ## do nothing, -y was used before getopts } } ## post-process a few variables $logdest = RefDB::Log::num_logdest($logdest); $loglevel = RefDB::Log::num_loglevel($loglevel); ## if we're supposed to write to an output file, try to open it if (length($outfile) > 0 && $is_cgi == 0) { ## try to open the output file if ($append eq "t") { open OUT, ">>$outfile" or die "cannot open output file for appending: $outfile\n"; } else { open OUT, ">$outfile" or die "cannot open output file for overwriting: $outfile\n"; } ## make all print commands send output to this handle select OUT; } ## set up logging my $log = RefDB::Log::->new($logdest, $loglevel, $logfile, "en2ris.pl"); ## here the code proper starts my $last_tag = "TY - "; my $PY = ""; my $Y2 = ""; ## initialize character encoding conversion my $converter = Text::Iconv->new($from_enc, $to_enc); ## counter for datasets my $set_count = 0; ## this hash helps to convert month names to numbers my %monthnames = ( "January" => "01", "February" => "02", "March" => "03", "April" => "04", "May" => "05", "June" => "06", "July" => "07", "August" => "08", "September" => "09", "October" => "10", "November" => "11", "December" => "12"); ## this hash helps to convert month abbreviations to numbers my %monthabbrevs = ( "Jan" => "01", "Feb" => "02", "Mar" => "03", "Apr" => "04", "May" => "05", "Jun" => "06", "Jul" => "07", "Aug" => "08", "Sep" => "09", "Oct" => "10", "Nov" => "11", "Dec" => "12"); ## read data from stdin while (<>) { # remove an odd character that EndNote exports once in a while for no # good reason s/[\035]//; if ($_ =~ /^(.. - )/) { $last_tag = $1; } elsif ($last_tag eq "KW - ") { print $last_tag . $_; $_ = ""; } if ($_ =~ /^SP - /) { $_ =~ s/^SP - (.*)-.*/SP - $1/; $log->log_print("debug", "fixed SP"); } elsif ($_ =~ /^EP - /) { if ($_ =~ /^EP - .*-.*/) { $_ =~ s/^EP - .*-(.*)/EP - $1/; } else { $_ = ""; } $log->log_print("debug", "fixed EP"); } elsif ($_ =~ /^PY - /) { chomp $_; $PY = substr($_, 6); $_ = ""; $log->log_print("debug", "found PY"); } elsif ($_ =~ /^Y2 - /) { chomp $_; $Y2 = substr($_, 6); $_ = ""; $log->log_print("debug", "found Y2"); } elsif ($_ =~ /^ER - /) { # dump pubyear string, reset variables for new round my $datestring = fix_dates($PY, $Y2); print $converter->convert("PY - $datestring\n"); $PY = ""; $Y2 = ""; $set_count++; } elsif ($_ =~ /^ID - /) { # informational message about the current dataset $log->log_print("info", substr($_, 6)); } print $converter->convert("$_"); } $log->log_print("info", "converted $set_count datasets"); ## done processing all input $log->close(); ## the end ## this function assumes that the publication year is in the PY field, ## whereas month, date, and otherinfo are in the Y2 field. Two formats ## of the Y2 field are recognized: /month/day/otherinfo, where month ## may either be numeric or a month name/abbrev; or a date like "March 10" sub fix_dates { my ($PY, $Y2) = @_; my $month; my $day; my $otherinfo; # strip leading slashes $Y2 =~ s!^\/*!!; # see whether we have a monthname while ( ($key, $value) = each %monthnames) { if ($Y2 =~ s!^$key/!!) { $month = $value; $otherinfo = $Y2; last; } } # see whether we have a monthabbrev if (!length($month)) { while ( ($key, $value) = each %monthabbrevs) { if ($Y2 =~ s!^$key/!!) { $month = $value; $otherinfo = $Y2; last; } } } # if we still don't have a month, use string as numeric if (!length($month)) { $month = $Y2; $month =~ s!^([^/]*)/.*!$1!; if ($month =~ s/^(\d{1,2}).*/$1/) { if (length($month) == 1) { $month = "0" . $month; } $Y2 =~ s!^[^/]*/(.*)!$1!; } else { $month = undef; } $otherinfo = $Y2; } # separate day and otherinfo, if any if (length($otherinfo)) { $otherinfo =~ s!^[\s/]*!!; $day = $otherinfo; # use first one or two digits as day if ($day =~ s/^(\d{1,2}).*/$1/) { $otherinfo =~ s/^\d{1,2}\s*(.*)/$1/; # day requires exactly two digits if (length($day) == 1) { $day = "0" . $day; } } else { $day = undef; } } if (length($otherinfo)) { $otherinfo =~ s!^[\s/]*!!; if (!defined($month)) { # last attempt to find month and date; assume no slashes while ( ($key, $value) = each %monthnames) { if ($otherinfo =~ s!^$key!!) { $month = $value; last; } } # see whether we have a monthabbrev if (!length($month)) { while ( ($key, $value) = each %monthabbrevs) { if ($otherinfo =~ s!^$key!!) { $month = $value; last; } } } # maybe there's also a day if (length($otherinfo)) { $otherinfo =~ s!^[\s]*!!; $day = $otherinfo; # use first one or two digits as day if ($day =~ s/^(\d{1,2}).*/$1/) { $otherinfo =~ s/^\d{1,2}\s*(.*)/$1/; # day requires exactly two digits if (length($day) == 1) { $day = "0" . $day; } } else { $day = undef; } } } } if (!length($PY)) { $log->log_print("warning", "found no PY"); $PY = "0000"; } # assemble return string return $PY . "/" . $month . "/" . $day . "/" . $otherinfo; } |