You can subscribe to this list here.
2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(132) |
Jul
(50) |
Aug
(172) |
Sep
(87) |
Oct
|
Nov
(1) |
Dec
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(39) |
Oct
(2) |
Nov
|
Dec
|
2004 |
Jan
(47) |
Feb
|
Mar
(11) |
Apr
|
May
|
Jun
|
Jul
|
Aug
(2) |
Sep
|
Oct
(9) |
Nov
|
Dec
|
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(2) |
Oct
|
Nov
|
Dec
|
From: Egon W. <eg...@us...> - 2002-07-29 11:08:43
|
Update of /cvsroot/woc/woc/cgi-bin/email In directory usw-pr-cvs1:/tmp/cvs-serv17626/email Added Files: wo...@sc... Log Message: Added more scripts. --- NEW FILE: wo...@sc... --- #!/usr/local/gnu/bin/perl -- -*-perl-*- # ------------------------------------------------------------ # Define fairly-constants $mailprog = '/usr/ucb/mail'; $recipient = 'wo...@sc...'; $recipientname = 'WOC redactie'; $subject = 'Reactie op WOC'; # Get the input read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); # Split the name-value pairs @pairs = split(/&/, $buffer); foreach $pair (@pairs) { ($name, $value) = split(/=/, $pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; # Uncomment for debugging purposes # print "Setting $naam to $value<P>"; $FORM{$name} = $value; } # Now send mail to $recipient open (MAIL, "|$mailprog -s \"$subject\" -r \"" . $FORM{'username'} . "\" $recipient") || die "Can't open $mailprog!\n\n"; print MAIL "$FORM{'realname'} (email : $FORM{'username'}) van $ENV{'REMOTE_HOST'}\n"; print MAIL "stuurt de volgende reactie over het Woordenboek Organische Chemie:\n"; print MAIL "\n"; print MAIL "$ENV{'REMOTE_EMAIL'}\n"; print MAIL "------------------------------------------------------------\n\n"; print MAIL " $FORM{'comments'} \n"; print MAIL "------------------------------------------------------------\n\n"; print MAIL "\n"; # print MAIL "Hij/zij wil $FORM{'email'} email ontvangen over ontwikkelingen."; close (MAIL); # And hold the closing argument. # Print out what we need print "Content-type: text/html\n\n"; print "<html>\n"; print "<head>\n"; print " <BASE href=\"http://www-woc.sci.kun.nl/\">"; print " <TITLE>Reactie verstuurt.</TITLE>\n"; print "</head>\n"; print "<body bgcolor=white vlink=blue alink=blue link=blue background=\"pictures/system/background.gif\">\n"; print "<SCRIPT>\n"; print " <!--\n"; print " top.frames[1].window.location=\"locatie/reactie_v.html\";\n"; print " // -->\n"; print "</SCRIPT>\n"; print "<p><br>"; print "<H3>Reactie is verzonden.</H3>\n"; print "<ul>"; print "Het volgende bericht is gestuurd naar "; print "$recipientname.\n"; print "<p><b>Naam :</b> $FORM{'realname'}"; print "<p><b>Email :</b> $FORM{'username'}"; print "<p><b>Reactie :</b><br>"; print "<ul>$FORM{'comments'}</ul>"; # print "<p>U heeft aangegeven <i>"; # # if (($FORM{'email'})eq('Ja')){ print "wel";} # else { print "niet"; } # # print "</i> email te willen ontvangen over ontwikkelingen "; # print "van het Woordenboek Organische Chemie.\n"; print "</ul></body>"; print "</html>"; |
From: Egon W. <eg...@us...> - 2002-07-29 10:55:08
|
Update of /cvsroot/woc/woc/cgi-bin/email In directory usw-pr-cvs1:/tmp/cvs-serv14303/email Log Message: Directory /cvsroot/woc/woc/cgi-bin/email added to the repository |
From: Egon W. <eg...@us...> - 2002-07-29 10:55:08
|
Update of /cvsroot/woc/woc/cgi-bin/stats In directory usw-pr-cvs1:/tmp/cvs-serv14303/stats Log Message: Directory /cvsroot/woc/woc/cgi-bin/stats added to the repository |
From: Egon W. <eg...@us...> - 2002-07-29 10:54:01
|
Update of /cvsroot/woc/woc/cgi-bin In directory usw-pr-cvs1:/tmp/cvs-serv13954 Removed Files: Article.pm webinterface.pl Log Message: Oops, wrong directory. --- Article.pm DELETED --- --- webinterface.pl DELETED --- |
From: Egon W. <eg...@us...> - 2002-07-29 10:54:01
|
Update of /cvsroot/woc/woc/cgi-bin/usenet In directory usw-pr-cvs1:/tmp/cvs-serv13954/usenet Added Files: Article.pm webinterface.pl Log Message: Oops, wrong directory. --- NEW FILE: Article.pm --- package Article; use strict; sub new { my $self = {}; $self->{NO} = undef; $self->{ID} = undef; $self->{SUBJECT} = undef; $self->{FROM} = undef; $self->{DATE} = undef; $self->{REFS} = undef; $self->{FUPS} = undef; $self->{THEME} = undef; $self->{BODY} = []; bless($self); return $self; } sub addFup { #my $self = shift; #my $fup = shift; $_[0]->{FUPS} .= " $_[1]"; } sub theme { #my $self = shift; if ($_[1]) { $_[0]->{THEME} = $_[1] }; #= shift }; return $_[0]->{THEME}; } sub fups { #my $self = shift; if ($_[1]) { $_[0]->{FUPS} = $_[1] }; #= shift }; return $_[0]->{FUPS}; } sub refs { #my $self = shift; if ($_[1]) { $_[0]->{REFS} = $_[1] }; #= shift }; return $_[0]->{REFS}; } sub body { my $self = shift; if (@_) { @{$self->{BODY}} = @_ }; return @{$self->{BODY}}; } sub from { #my $self = shift; if ($_[1]) { $_[0]->{FROM} = $_[1] }; #= shift }; return $_[0]->{FROM}; } sub no { #my $self = shift; if ($_[1]) { $_[0]->{NO} = $_[1] }; #= shift }; return $_[0]->{NO}; } sub date { #my $self = shift; if ($_[1]) { $_[0]->{DATE} = $_[1] }; #= shift }; return $_[0]->{DATE}; } sub id { #my $self = shift; if ($_[1]) { $_[0]->{ID} = $_[1] }; #= shift }; return $_[0]->{ID}; } sub subject { #my $self = shift; if ($_[1]) { $_[0]->{SUBJECT} = $_[1] }; #= shift }; return $_[0]->{SUBJECT}; } 1; --- NEW FILE: webinterface.pl --- #!/usr/local/bin/perl -w use strict; use diagnostics; use News::NNTPClient; use Article; use CGI; # consts my $url = "/cgi-bin-woc/usenet/webinterface.pl"; my $server = "nntp-srv.sci.kun.nl"; my $group = "nl.wetenschap"; my $cachedir = "/tmp/woc/usenet"; my @themes = ("chemische verbindingen", "meer info", "synthese", "chemische software", "anders"); # read params my $query = new CGI; my $message = int($query->param("message") || 0); my $command = $query->param("command") || "overzicht"; my $showheader = int($query->param("header") || undef); my $theme = $query->param("theme") || undef; my $tvalue = $query->param("threaded"); my $threaded = 1; $threaded = int($tvalue) if (defined $tvalue); # instantiate client my $client = new News::NNTPClient($server); my ($first, $last) = ($client->group($group)); my $status = ""; my $content = ""; my %articles = (); my %index = (); my %printed = (); my %fups = (); if ($command eq "search") { my $searched = $query->param("query") || ""; my $field = $query->param("field") || "alle"; if ($searched ne "") { # parse articles for (my $i = $first; $i <= $last; $i++) { my $art = getArticleByNo($i); if (defined $art) { $articles{$art->id()} = $art; $index{int($i)} = $art->id(); if (defined $art->refs()) { foreach my $ref (split(' ', $art->refs())) { my $nart = $articles{$ref}; if (defined $nart) { $nart->addFup($art->id()); } } } } } # do search $content .= "<h3>Gezocht naar: $searched</h3>$/<ul>$/"; foreach my $artid (reverse sort keys %index) { my $index = $index{int($artid)}; my $art = $articles{$index}; my $match = 0; if (($field eq "onderwerp" || $field eq "alle") && (defined (my $subject = $art->subject()))) { $match = 1 if ($subject =~ /$searched/ig); } if (!$match && ($field eq "schrijver" || $field eq "alle") && (defined (my $author = $art->from()))) { $match = 1 if ($author =~ /$searched/ig); } if (!$match && ($field eq "datum" || $field eq "alle") && (defined (my $date = $art->date()))) { $match = 1 if ($date =~ /$searched/ig); } if (!$match && ($field eq "bericht" || $field eq "alle") && (defined (my @text = $art->body()))) { foreach my $line (@text) { #$content .= $line; $match = 1 if ($line =~ /$searched/ig); } } if ($match == 1) { $content .= printArticle($index); } } $content .= "</ul>$/"; } else { $content .= "<H3>Zoeken:</h3>$/"; $content .= "<p><FORM METHOD=\"POST\" ACTION=\"$url\">$/$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"command\" VALUE=\"search\">$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD>Veld:</TD>$/"; $content .= " <TD><SELECT name=\"field\" size=\"1\">$/"; $content .= " <OPTION value=\"onderwerp\">onderwerp</OPTION>$/"; $content .= " <OPTION value=\"schrijver\">schrijver</OPTION>$/"; $content .= " <OPTION value=\"bericht\">bericht</OPTION>$/"; $content .= " <OPTION value=\"datum\">datum</OPTION>$/"; $content .= " <OPTION SELECTED value=\"alle\">alle</OPTION>$/"; $content .= " </SELECT></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Zoekwoord:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"query\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; $content .= " <INPUT TYPE=\"Submit\" VALUE=\"Zoeken\">$/"; $content .= " </FORM>$/"; } $content .= "<center>"; $content .= "[<a href=\"$url\">overzicht</a>] $/"; $content .= "[<a href=\"$url?command=search\">zoeken</a>] $/" if ($searched ne ""); $content .= "</center>"; } elsif (($command eq "postquest") && $client->postok()) { my $name = $query->param("name") || ""; my $email = $query->param("email") || ""; my $qsubject = $query->param("qsubject") || ""; my $react = $query->param("content") || ""; my $theme = $query->param("theme") || "Anders"; # email should be checked! # post message my @header = ("Newsgroups: $group", "Subject: $qsubject", "From: $email", "X-Newsreader: \"WOC's UseNet WebInterface: http://www.sci.kun.nl/woc/\"", "X-WOC-Theme: $theme"); my @reactbody = ("$react"); my $status = $client->post(@header, "", @reactbody); # give response if ($status) { $content .= "<h3>Verstuurd:</h3>$/<P>$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD width=\"25%\">Naam:</TD>$/"; $content .= " <TD>$name</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD>$email</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD>$qsubject</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Vraag:</TD>$/"; $content .= " <TD>$react</TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; } else { $content .= "<H3>Vraag niet verstuurd:</h3>$/"; $content .= "<p>" . $client->message() . "$/"; } $content .= "<center>"; $content .= "[<a href=\"$url\">overzicht</a>] $/"; $content .= "</center>"; } elsif (($command eq "postreact") && $message && $client->postok()) { my $art = getArticleByNo($message); my $name = $query->param("name") || ""; my $email = $query->param("email") || ""; my $react = $query->param("content") || ""; # email should be checked! # post message my @header = (); if (defined $art->theme()) { @header = ("Newsgroups: $group", "Subject: Re: " . $art->subject(), "From: $email ($name)", "X-Newsreader: \"WOC's UseNet WebInterface: http://www.sci.kun.nl/woc/\"", "References: " . $art->refs() . " " . $art->id(), "X-WOC-Theme: " . $art->theme()); } else { @header = ("Newsgroups: $group", "Subject: Re: " . $art->subject(), "From: $email ($name)", "X-Newsreader: \"WOC's UseNet WebInterface: http://www.sci.kun.nl/woc/\"", "References: " . $art->refs() . " " . $art->id()); } my @reactbody = ("$react"); my $status = $client->post(@header, "", @reactbody); # give response if ($status) { $content .= "<h3>Verstuurd:</h3>$/<P>$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD width=\"25%\">Naam:</TD>$/"; $content .= " <TD>$name</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD>$email</TD>$/"; $content .= " </TR>$/"; if (defined $art->theme()) { $content .= " <TR>$/"; $content .= " <TD>Thema:</TD>$/"; $content .= " <TD>" . $art->theme() . "</TD>$/"; $content .= " </TR>$/"; } $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD>Re: " . $art->subject() . "</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Reactie:</TD>$/"; $content .= " <TD>$react</TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; } else { $content .= "<H3>Bericht niet verstuurd:</h3>$/"; $content .= "<p>" . $client->message() . "$/"; } $content .= "<center>[<a href=\"$url\">overzicht</a>]</center>"; } elsif ($command eq "getquest") { $content .= "<h3>Post vraag:</h3>$/"; $content .= "<p><FORM METHOD=\"POST\" ACTION=\"$url\">$/$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"command\" VALUE=\"postquest\">$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD>Naam:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"name\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"email\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Thema:</TD>$/"; $content .= " <TD><SELECT name=\"theme\" size=\"1\">$/"; foreach my $atheme (@themes) { $content .= " <OPTION value=\"$atheme\">$atheme</OPTION>$/"; } $content .= " </SELECT></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"qsubject\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Vraag:</TD>$/"; $content .= " <TD><TEXTAREA NAME=\"content\" COLS=\"50\" ROWS=\"10\"></TEXTAREA></TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; $content .= " <INPUT TYPE=\"Submit\" VALUE=\"Verstuur\">$/"; $content .= " <INPUT TYPE=\"Reset\" VALUE=\"Wis\">$/"; $content .= " </FORM>$/"; $content .= "<center>[<a href=\"$url\">overzicht</a>]</center>"; } elsif (($command eq "getreact") && $message) { my $art = getArticleByNo($message); $content .= "<h3>Reactie op: " . $art->subject() . "</h3>$/"; $content .= "<p>" . addBreaks($art->body()) . "</p><hr />$/"; $content .= "<h3>Reactie:</h3>$/"; $content .= "<p><FORM METHOD=\"POST\" ACTION=\"$url\">$/$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"command\" VALUE=\"postreact\">$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"message\" VALUE=\"$message\">$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD>Naam:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"name\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"email\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; if (defined $art->theme()) { $content .= " <TR>$/"; $content .= " <TD>Thema:</TD>$/"; $content .= " <TD>" . $art->theme() . "</TD>$/"; $content .= " </TR>$/"; } $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD>Re: " . $art->subject() . "</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Reactie:</TD>$/"; $content .= " <TD><TEXTAREA NAME=\"content\" COLS=\"50\" ROWS=\"10\"></TEXTAREA></TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; $content .= " <INPUT TYPE=\"Submit\" VALUE=\"Verstuur\">$/"; $content .= " <INPUT TYPE=\"Reset\" VALUE=\"Wis\">$/"; $content .= " </FORM>$/"; $content .= "<center>[<a href=\"$url\">overzicht</a>]</center>"; } elsif ($message) { my $art = getArticleByNo($message); $content .= "<h3>" . $art->subject() . "</h3>\n"; if ($showheader) { $content .= "<b>$/"; $content .= addBreaks($client->head($message)); $content .= "</b>$/"; } $content .= "<p>" . addBreaks($art->body()); $content .= "<center>[<a href=\"$url\">overzicht</a>] "; $content .= "[<a href=\"$url?command=search\">zoeken</a>] "; if ($showheader) { $content .= "[<a href=\"$url?message=$message\">geen header</a>] "; } else { $content .= "[<a href=\"$url?message=$message&header=1\">header</a>] "; } if ($client->postok()) { $content .= "[<a href=\"$url?command=getreact&message=$message\">reageer</a>]</center>"; } } else { my $max = $query->param("max") || ($last + 1); my $oldmax = $query->param("max") || 0; my $needed = 10; $needed = 20 if (!$threaded); my $counter = $needed; # parse articles my $i = $max - 1; while ($i > $first && $counter) { my $art = getArticleByNo($i); if (defined $art) { my $id = $art->id(); $articles{$id} = $art; $index{$i} = $id; if (defined $art->refs()) { foreach my $ref (split(' ', $art->refs())) { if ($fups{$ref}) { $fups{$ref} = "$id " . $fups{$ref}; } else { $fups{$ref} = $id; } } } else { if ($theme) { $counter-- if ($art->theme() eq $theme); } else { $counter--; } } } $i--; } # possibly show theme's my $link = ""; my $linktext = ""; if (defined $theme) { $content .= "<center>$/"; my $atheme = $theme; $atheme = "Alle" if ($theme eq "*"); $content .= "<h3>$atheme</h3>$/"; foreach my $atheme (@themes) { $link = "$url?command=overzicht&theme=$atheme"; $link =~ s/\ /\%20/g; if ($threaded) { $link .= "&threaded=1"; } else { $link .= "&threaded=0"; } $content .= "[<a href=\"$link\">$atheme</a>] "; } $content .= "[<a href=\"$url?command=overzicht&theme=*\">alle</a>] "; $content .= "</center><p>$/"; } # show last 15 articles $counter = $needed; $content .= "<ul>$/"; foreach my $artid (reverse sort keys %index) { if ($counter > 0) { my $index = $index{int($artid)}; my $art = $articles{$index}; my $print = 1; if (defined $theme) { $print = 0; if (defined (my $arttheme = $art->theme())) { if ($theme eq "*" || $theme eq $arttheme) { $print = 1; } } } if ($print) { if ($threaded && (!defined $art->refs())) { $content .= printThreadedArticle($index); $counter--; } elsif (!$threaded) { $content .= printArticle($index); $counter--; } } $max = $artid; } } $content .= "</ul>$/"; $content .= "<p><center>"; $content .= "[<a href=\"$url?command=getquest\">stel vraag</a>] "; $content .= "[<a href=\"$url?command=search\">zoeken</a>] "; $link = ""; if ($max > $first) { $link = "$url?command=overzicht&max=$max"; if ($threaded) { $link .= "&threaded=1"; } else { $link .= "&threaded=0"; } $content .= "[<a href=\"$link\">oudere berichten</a>] "; } $link = "$url?command=overzicht"; $linktext = "threading"; if ($threaded) { $linktext = "geen $linktext"; $link .= "&threaded=0"; } else { $link .= "&threaded=1"; } if ($theme) { $link .= "&theme=$theme"; } $link .= "&max=$oldmax" if ($oldmax); $content .= "[<a href=\"$link\">$linktext</a>] "; $link = "$url?command=overzicht"; if ($threaded) { $link .= "&threaded=1"; } else { $link .= "&threaded=0"; } if ($theme) { $linktext = "geen thema's"; } else { $linktext = "thema's"; $link .= "&theme=*"; } $content .= "[<a href=\"$link\">$linktext</a>] "; $content .= "</center>"; } # print output print <<END; Content-type: text/html <html> <head> <title>UseNet Interface: $group</title> </head> <body bgcolor="white"> <h1>UseNet Interface: $group</h1><hr /> $status <p>$content</p><hr /> </body> </html> END sub getArticleByNo() { my ($no) = shift @_; my $file = "$cachedir/$group.$no"; $status = "<!-- ? files in cache ($cachedir)-->" if ($status eq ""); my @article = (); if (-e $file) { open(CACHEFILE, "<$file"); @article = <CACHEFILE>; close(CACHEFILE); } else { @article = $client->article($no); return undef if (!@article); #test if cache exists `chmod 777 $cachedir`; mkdir $cachedir, 777 if (!-e $cachedir); #save to cache open(CACHEFILE, ">$file"); foreach my $line (@article) { print CACHEFILE $line; } close(CACHEFILE); } # parse article my $art = Article->new(); $art->no($no); my @body = (); my $isbody = 0; foreach my $line (@article) { if ($isbody) { push(@body, $line); } elsif ($line =~ /Subject:\s*(.*)/) { $art->subject($1); } elsif ($line =~ /Date:\s*(.*)/) { $art->date($1); } elsif ($line =~ /From:\s*(.*)/) { $art->from($1); } elsif ($line =~ /References:\s*(.*)/) { $art->refs($1); } elsif ($line =~ /Message-ID:\s*(.*)/) { $art->id($1); } elsif ($line =~ /X-WOC-Theme:\s*(.*)/) { $art->theme($1); } elsif ($line =~ /^(\n|\r)/) { $isbody = 1; } } $art->body(@body); return $art; } sub addBreaks() { my @body = @_; my $result = ""; foreach my $line (@body) { $line =~ s/\n/<br>/g; $result .= "$line\n"; } return $result; } sub printArticle() { my $artid = shift @_; my $art = $articles{$artid}; my $content = ""; if (!defined $printed{$artid} && (defined $art)) { $content .= "<li><a href=\"$url?message=" . $art->no() . "\">" . $art->subject() . "</a> " . $art->from() . " (<i>" . $art->date() . "</i>)$/"; $printed{$artid} = "yes"; }; return $content; } sub printThreadedArticle() { my $artid = shift @_; my $art = $articles{$artid}; my $content = ""; if (defined $art) { if (!defined $printed{$artid}) { $content .= "<li><a href=\"$url?message=" . $art->no() . "\">" . $art->subject() . "</a> " . $art->from() . " (<i>" . $art->date() . "</i>)$/"; $printed{$artid} = "yes"; if (defined $fups{$artid}) { $content .= "<ul>$/"; foreach my $fup (split(' ', $fups{$artid})) { $content .= printThreadedArticle($fup); } $content .= "</ul>$/"; } } } else { $content .= "<li>article (" . $artid. ") not found$/"; } return $content; } |
From: Egon W. <eg...@us...> - 2002-07-29 10:53:19
|
Update of /cvsroot/woc/woc/cgi-bin/usenet In directory usw-pr-cvs1:/tmp/cvs-serv13801/usenet Log Message: Directory /cvsroot/woc/woc/cgi-bin/usenet added to the repository |
From: Egon W. <eg...@us...> - 2002-07-29 10:53:05
|
Update of /cvsroot/woc/woc/cgi-bin In directory usw-pr-cvs1:/tmp/cvs-serv13718 Added Files: Article.pm webinterface.pl Log Message: Added some stuff. --- NEW FILE: Article.pm --- package Article; use strict; sub new { my $self = {}; $self->{NO} = undef; $self->{ID} = undef; $self->{SUBJECT} = undef; $self->{FROM} = undef; $self->{DATE} = undef; $self->{REFS} = undef; $self->{FUPS} = undef; $self->{THEME} = undef; $self->{BODY} = []; bless($self); return $self; } sub addFup { #my $self = shift; #my $fup = shift; $_[0]->{FUPS} .= " $_[1]"; } sub theme { #my $self = shift; if ($_[1]) { $_[0]->{THEME} = $_[1] }; #= shift }; return $_[0]->{THEME}; } sub fups { #my $self = shift; if ($_[1]) { $_[0]->{FUPS} = $_[1] }; #= shift }; return $_[0]->{FUPS}; } sub refs { #my $self = shift; if ($_[1]) { $_[0]->{REFS} = $_[1] }; #= shift }; return $_[0]->{REFS}; } sub body { my $self = shift; if (@_) { @{$self->{BODY}} = @_ }; return @{$self->{BODY}}; } sub from { #my $self = shift; if ($_[1]) { $_[0]->{FROM} = $_[1] }; #= shift }; return $_[0]->{FROM}; } sub no { #my $self = shift; if ($_[1]) { $_[0]->{NO} = $_[1] }; #= shift }; return $_[0]->{NO}; } sub date { #my $self = shift; if ($_[1]) { $_[0]->{DATE} = $_[1] }; #= shift }; return $_[0]->{DATE}; } sub id { #my $self = shift; if ($_[1]) { $_[0]->{ID} = $_[1] }; #= shift }; return $_[0]->{ID}; } sub subject { #my $self = shift; if ($_[1]) { $_[0]->{SUBJECT} = $_[1] }; #= shift }; return $_[0]->{SUBJECT}; } 1; --- NEW FILE: webinterface.pl --- #!/usr/local/bin/perl -w use strict; use diagnostics; use News::NNTPClient; use Article; use CGI; # consts my $url = "/cgi-bin-woc/usenet/webinterface.pl"; my $server = "nntp-srv.sci.kun.nl"; my $group = "nl.wetenschap"; my $cachedir = "/tmp/woc/usenet"; my @themes = ("chemische verbindingen", "meer info", "synthese", "chemische software", "anders"); # read params my $query = new CGI; my $message = int($query->param("message") || 0); my $command = $query->param("command") || "overzicht"; my $showheader = int($query->param("header") || undef); my $theme = $query->param("theme") || undef; my $tvalue = $query->param("threaded"); my $threaded = 1; $threaded = int($tvalue) if (defined $tvalue); # instantiate client my $client = new News::NNTPClient($server); my ($first, $last) = ($client->group($group)); my $status = ""; my $content = ""; my %articles = (); my %index = (); my %printed = (); my %fups = (); if ($command eq "search") { my $searched = $query->param("query") || ""; my $field = $query->param("field") || "alle"; if ($searched ne "") { # parse articles for (my $i = $first; $i <= $last; $i++) { my $art = getArticleByNo($i); if (defined $art) { $articles{$art->id()} = $art; $index{int($i)} = $art->id(); if (defined $art->refs()) { foreach my $ref (split(' ', $art->refs())) { my $nart = $articles{$ref}; if (defined $nart) { $nart->addFup($art->id()); } } } } } # do search $content .= "<h3>Gezocht naar: $searched</h3>$/<ul>$/"; foreach my $artid (reverse sort keys %index) { my $index = $index{int($artid)}; my $art = $articles{$index}; my $match = 0; if (($field eq "onderwerp" || $field eq "alle") && (defined (my $subject = $art->subject()))) { $match = 1 if ($subject =~ /$searched/ig); } if (!$match && ($field eq "schrijver" || $field eq "alle") && (defined (my $author = $art->from()))) { $match = 1 if ($author =~ /$searched/ig); } if (!$match && ($field eq "datum" || $field eq "alle") && (defined (my $date = $art->date()))) { $match = 1 if ($date =~ /$searched/ig); } if (!$match && ($field eq "bericht" || $field eq "alle") && (defined (my @text = $art->body()))) { foreach my $line (@text) { #$content .= $line; $match = 1 if ($line =~ /$searched/ig); } } if ($match == 1) { $content .= printArticle($index); } } $content .= "</ul>$/"; } else { $content .= "<H3>Zoeken:</h3>$/"; $content .= "<p><FORM METHOD=\"POST\" ACTION=\"$url\">$/$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"command\" VALUE=\"search\">$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD>Veld:</TD>$/"; $content .= " <TD><SELECT name=\"field\" size=\"1\">$/"; $content .= " <OPTION value=\"onderwerp\">onderwerp</OPTION>$/"; $content .= " <OPTION value=\"schrijver\">schrijver</OPTION>$/"; $content .= " <OPTION value=\"bericht\">bericht</OPTION>$/"; $content .= " <OPTION value=\"datum\">datum</OPTION>$/"; $content .= " <OPTION SELECTED value=\"alle\">alle</OPTION>$/"; $content .= " </SELECT></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Zoekwoord:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"query\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; $content .= " <INPUT TYPE=\"Submit\" VALUE=\"Zoeken\">$/"; $content .= " </FORM>$/"; } $content .= "<center>"; $content .= "[<a href=\"$url\">overzicht</a>] $/"; $content .= "[<a href=\"$url?command=search\">zoeken</a>] $/" if ($searched ne ""); $content .= "</center>"; } elsif (($command eq "postquest") && $client->postok()) { my $name = $query->param("name") || ""; my $email = $query->param("email") || ""; my $qsubject = $query->param("qsubject") || ""; my $react = $query->param("content") || ""; my $theme = $query->param("theme") || "Anders"; # email should be checked! # post message my @header = ("Newsgroups: $group", "Subject: $qsubject", "From: $email", "X-Newsreader: \"WOC's UseNet WebInterface: http://www.sci.kun.nl/woc/\"", "X-WOC-Theme: $theme"); my @reactbody = ("$react"); my $status = $client->post(@header, "", @reactbody); # give response if ($status) { $content .= "<h3>Verstuurd:</h3>$/<P>$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD width=\"25%\">Naam:</TD>$/"; $content .= " <TD>$name</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD>$email</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD>$qsubject</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Vraag:</TD>$/"; $content .= " <TD>$react</TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; } else { $content .= "<H3>Vraag niet verstuurd:</h3>$/"; $content .= "<p>" . $client->message() . "$/"; } $content .= "<center>"; $content .= "[<a href=\"$url\">overzicht</a>] $/"; $content .= "</center>"; } elsif (($command eq "postreact") && $message && $client->postok()) { my $art = getArticleByNo($message); my $name = $query->param("name") || ""; my $email = $query->param("email") || ""; my $react = $query->param("content") || ""; # email should be checked! # post message my @header = (); if (defined $art->theme()) { @header = ("Newsgroups: $group", "Subject: Re: " . $art->subject(), "From: $email ($name)", "X-Newsreader: \"WOC's UseNet WebInterface: http://www.sci.kun.nl/woc/\"", "References: " . $art->refs() . " " . $art->id(), "X-WOC-Theme: " . $art->theme()); } else { @header = ("Newsgroups: $group", "Subject: Re: " . $art->subject(), "From: $email ($name)", "X-Newsreader: \"WOC's UseNet WebInterface: http://www.sci.kun.nl/woc/\"", "References: " . $art->refs() . " " . $art->id()); } my @reactbody = ("$react"); my $status = $client->post(@header, "", @reactbody); # give response if ($status) { $content .= "<h3>Verstuurd:</h3>$/<P>$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD width=\"25%\">Naam:</TD>$/"; $content .= " <TD>$name</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD>$email</TD>$/"; $content .= " </TR>$/"; if (defined $art->theme()) { $content .= " <TR>$/"; $content .= " <TD>Thema:</TD>$/"; $content .= " <TD>" . $art->theme() . "</TD>$/"; $content .= " </TR>$/"; } $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD>Re: " . $art->subject() . "</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Reactie:</TD>$/"; $content .= " <TD>$react</TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; } else { $content .= "<H3>Bericht niet verstuurd:</h3>$/"; $content .= "<p>" . $client->message() . "$/"; } $content .= "<center>[<a href=\"$url\">overzicht</a>]</center>"; } elsif ($command eq "getquest") { $content .= "<h3>Post vraag:</h3>$/"; $content .= "<p><FORM METHOD=\"POST\" ACTION=\"$url\">$/$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"command\" VALUE=\"postquest\">$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD>Naam:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"name\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"email\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Thema:</TD>$/"; $content .= " <TD><SELECT name=\"theme\" size=\"1\">$/"; foreach my $atheme (@themes) { $content .= " <OPTION value=\"$atheme\">$atheme</OPTION>$/"; } $content .= " </SELECT></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"qsubject\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Vraag:</TD>$/"; $content .= " <TD><TEXTAREA NAME=\"content\" COLS=\"50\" ROWS=\"10\"></TEXTAREA></TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; $content .= " <INPUT TYPE=\"Submit\" VALUE=\"Verstuur\">$/"; $content .= " <INPUT TYPE=\"Reset\" VALUE=\"Wis\">$/"; $content .= " </FORM>$/"; $content .= "<center>[<a href=\"$url\">overzicht</a>]</center>"; } elsif (($command eq "getreact") && $message) { my $art = getArticleByNo($message); $content .= "<h3>Reactie op: " . $art->subject() . "</h3>$/"; $content .= "<p>" . addBreaks($art->body()) . "</p><hr />$/"; $content .= "<h3>Reactie:</h3>$/"; $content .= "<p><FORM METHOD=\"POST\" ACTION=\"$url\">$/$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"command\" VALUE=\"postreact\">$/"; $content .= " <INPUT TYPE=\"Hidden\" NAME=\"message\" VALUE=\"$message\">$/"; $content .= " <TABLE width=\"100%\">$/"; $content .= " <TR>$/"; $content .= " <TD>Naam:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"name\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Email:</TD>$/"; $content .= " <TD><INPUT TYPE=\"Text\" NAME=\"email\" SIZE=\"50\" VALUE=\"\"></TD>$/"; $content .= " </TR>$/"; if (defined $art->theme()) { $content .= " <TR>$/"; $content .= " <TD>Thema:</TD>$/"; $content .= " <TD>" . $art->theme() . "</TD>$/"; $content .= " </TR>$/"; } $content .= " <TR>$/"; $content .= " <TD>Onderwerp:</TD>$/"; $content .= " <TD>Re: " . $art->subject() . "</TD>$/"; $content .= " </TR>$/"; $content .= " <TR>$/"; $content .= " <TD>Reactie:</TD>$/"; $content .= " <TD><TEXTAREA NAME=\"content\" COLS=\"50\" ROWS=\"10\"></TEXTAREA></TD>$/"; $content .= " </TR>$/"; $content .= " </TABLE>$/"; $content .= " <INPUT TYPE=\"Submit\" VALUE=\"Verstuur\">$/"; $content .= " <INPUT TYPE=\"Reset\" VALUE=\"Wis\">$/"; $content .= " </FORM>$/"; $content .= "<center>[<a href=\"$url\">overzicht</a>]</center>"; } elsif ($message) { my $art = getArticleByNo($message); $content .= "<h3>" . $art->subject() . "</h3>\n"; if ($showheader) { $content .= "<b>$/"; $content .= addBreaks($client->head($message)); $content .= "</b>$/"; } $content .= "<p>" . addBreaks($art->body()); $content .= "<center>[<a href=\"$url\">overzicht</a>] "; $content .= "[<a href=\"$url?command=search\">zoeken</a>] "; if ($showheader) { $content .= "[<a href=\"$url?message=$message\">geen header</a>] "; } else { $content .= "[<a href=\"$url?message=$message&header=1\">header</a>] "; } if ($client->postok()) { $content .= "[<a href=\"$url?command=getreact&message=$message\">reageer</a>]</center>"; } } else { my $max = $query->param("max") || ($last + 1); my $oldmax = $query->param("max") || 0; my $needed = 10; $needed = 20 if (!$threaded); my $counter = $needed; # parse articles my $i = $max - 1; while ($i > $first && $counter) { my $art = getArticleByNo($i); if (defined $art) { my $id = $art->id(); $articles{$id} = $art; $index{$i} = $id; if (defined $art->refs()) { foreach my $ref (split(' ', $art->refs())) { if ($fups{$ref}) { $fups{$ref} = "$id " . $fups{$ref}; } else { $fups{$ref} = $id; } } } else { if ($theme) { $counter-- if ($art->theme() eq $theme); } else { $counter--; } } } $i--; } # possibly show theme's my $link = ""; my $linktext = ""; if (defined $theme) { $content .= "<center>$/"; my $atheme = $theme; $atheme = "Alle" if ($theme eq "*"); $content .= "<h3>$atheme</h3>$/"; foreach my $atheme (@themes) { $link = "$url?command=overzicht&theme=$atheme"; $link =~ s/\ /\%20/g; if ($threaded) { $link .= "&threaded=1"; } else { $link .= "&threaded=0"; } $content .= "[<a href=\"$link\">$atheme</a>] "; } $content .= "[<a href=\"$url?command=overzicht&theme=*\">alle</a>] "; $content .= "</center><p>$/"; } # show last 15 articles $counter = $needed; $content .= "<ul>$/"; foreach my $artid (reverse sort keys %index) { if ($counter > 0) { my $index = $index{int($artid)}; my $art = $articles{$index}; my $print = 1; if (defined $theme) { $print = 0; if (defined (my $arttheme = $art->theme())) { if ($theme eq "*" || $theme eq $arttheme) { $print = 1; } } } if ($print) { if ($threaded && (!defined $art->refs())) { $content .= printThreadedArticle($index); $counter--; } elsif (!$threaded) { $content .= printArticle($index); $counter--; } } $max = $artid; } } $content .= "</ul>$/"; $content .= "<p><center>"; $content .= "[<a href=\"$url?command=getquest\">stel vraag</a>] "; $content .= "[<a href=\"$url?command=search\">zoeken</a>] "; $link = ""; if ($max > $first) { $link = "$url?command=overzicht&max=$max"; if ($threaded) { $link .= "&threaded=1"; } else { $link .= "&threaded=0"; } $content .= "[<a href=\"$link\">oudere berichten</a>] "; } $link = "$url?command=overzicht"; $linktext = "threading"; if ($threaded) { $linktext = "geen $linktext"; $link .= "&threaded=0"; } else { $link .= "&threaded=1"; } if ($theme) { $link .= "&theme=$theme"; } $link .= "&max=$oldmax" if ($oldmax); $content .= "[<a href=\"$link\">$linktext</a>] "; $link = "$url?command=overzicht"; if ($threaded) { $link .= "&threaded=1"; } else { $link .= "&threaded=0"; } if ($theme) { $linktext = "geen thema's"; } else { $linktext = "thema's"; $link .= "&theme=*"; } $content .= "[<a href=\"$link\">$linktext</a>] "; $content .= "</center>"; } # print output print <<END; Content-type: text/html <html> <head> <title>UseNet Interface: $group</title> </head> <body bgcolor="white"> <h1>UseNet Interface: $group</h1><hr /> $status <p>$content</p><hr /> </body> </html> END sub getArticleByNo() { my ($no) = shift @_; my $file = "$cachedir/$group.$no"; $status = "<!-- ? files in cache ($cachedir)-->" if ($status eq ""); my @article = (); if (-e $file) { open(CACHEFILE, "<$file"); @article = <CACHEFILE>; close(CACHEFILE); } else { @article = $client->article($no); return undef if (!@article); #test if cache exists `chmod 777 $cachedir`; mkdir $cachedir, 777 if (!-e $cachedir); #save to cache open(CACHEFILE, ">$file"); foreach my $line (@article) { print CACHEFILE $line; } close(CACHEFILE); } # parse article my $art = Article->new(); $art->no($no); my @body = (); my $isbody = 0; foreach my $line (@article) { if ($isbody) { push(@body, $line); } elsif ($line =~ /Subject:\s*(.*)/) { $art->subject($1); } elsif ($line =~ /Date:\s*(.*)/) { $art->date($1); } elsif ($line =~ /From:\s*(.*)/) { $art->from($1); } elsif ($line =~ /References:\s*(.*)/) { $art->refs($1); } elsif ($line =~ /Message-ID:\s*(.*)/) { $art->id($1); } elsif ($line =~ /X-WOC-Theme:\s*(.*)/) { $art->theme($1); } elsif ($line =~ /^(\n|\r)/) { $isbody = 1; } } $art->body(@body); return $art; } sub addBreaks() { my @body = @_; my $result = ""; foreach my $line (@body) { $line =~ s/\n/<br>/g; $result .= "$line\n"; } return $result; } sub printArticle() { my $artid = shift @_; my $art = $articles{$artid}; my $content = ""; if (!defined $printed{$artid} && (defined $art)) { $content .= "<li><a href=\"$url?message=" . $art->no() . "\">" . $art->subject() . "</a> " . $art->from() . " (<i>" . $art->date() . "</i>)$/"; $printed{$artid} = "yes"; }; return $content; } sub printThreadedArticle() { my $artid = shift @_; my $art = $articles{$artid}; my $content = ""; if (defined $art) { if (!defined $printed{$artid}) { $content .= "<li><a href=\"$url?message=" . $art->no() . "\">" . $art->subject() . "</a> " . $art->from() . " (<i>" . $art->date() . "</i>)$/"; $printed{$artid} = "yes"; if (defined $fups{$artid}) { $content .= "<ul>$/"; foreach my $fup (split(' ', $fups{$artid})) { $content .= printThreadedArticle($fup); } $content .= "</ul>$/"; } } } else { $content .= "<li>article (" . $artid. ") not found$/"; } return $content; } |
From: Egon W. <eg...@us...> - 2002-07-29 10:53:05
|
Update of /cvsroot/woc/woc/cgi-bin/ssi In directory usw-pr-cvs1:/tmp/cvs-serv13718/ssi Added Files: laatste_tien.pl topicmap.pl Log Message: Added some stuff. --- NEW FILE: laatste_tien.pl --- #!/usr/local/bin/perl -w # # Shows the last ten items that has been added or changed AND for which an *.shtml file # is available. # # updated: 25-11-99 -> use real names instead of filenames, uses NAME Like... # 07-04-01 -> use CODE instead of filenames # use strict; use diagnostics; print "Content-type: text/html\n\n"; my $root = "/vol/www/woc/web-docs"; my $dir = "/vol/www/woc/data/wml"; my @files = `ls -t1 $dir/*.xml`; my $titel; my $code; my $i = 0; while ( $i < 10) { my $file = shift @files; $file =~ s/.*?([\w|\-|\_]*).xml\n/$1/; if (-e "$dir/$file.xml" && -r "$dir/$file.xml") { open (FILE, "<$dir/$file.xml") || die "Error: $dir/$file.xml not found!"; $titel = ""; $code = ""; while (<FILE>) { my $line = $_; if ($line =~ m/ITEM.*NAME=\"(.*?)\"/i) { $titel = $1; } if ($line =~ m/ITEM.*CODE=\"(.*?)\"/i) { $code = $1; } } close(FILE); if ( $titel && -e "$root/gui/items/$code.shtml") { $i++; print "<a href=\"http://www-woc.sci.kun.nl/gui/items/$code.shtml\">$titel</a><br />$/"; } else { # print "<!-- not found $code (was $titel, $file.xml) -->$/"; } } else { # print "<!-- cannot read/open $file -->$/"; } } --- NEW FILE: topicmap.pl --- #!/usr/local/bin/perl -w use strict; ## global vars ## my $associations_indexfile = "../../bin/topic/associations_index"; my $xmlfiles_indexfile = "../../bin/topic/xmlfiles_index"; my $shtmlfiles_indexfile = "../../bin/topic/shtmlfiles_index"; my $wmldir = "../../data/wml"; my $verbose = ""; # False my $debug = ""; # False ### MAIN ### print "Content-type: text/plain\n\n\n"; my $associations_index = {()}; my $xmlfiles_index = {()}; my $shtmlfiles_index = {()}; if (@ARGV) { while ($ARGV[0] =~ /^\-/) { if ($ARGV[0] =~ /^\-d/i) { $debug = "Active"; shift @ARGV; } if ($ARGV[0] =~ /^\-v/i) { $verbose = "Active"; shift @ARGV; } } $associations_index = &get_associations_index ($associations_indexfile); $xmlfiles_index = &get_xmlfiles_index ($xmlfiles_indexfile); $shtmlfiles_index = &get_shtmlfiles_index ($shtmlfiles_indexfile); foreach my $file (@ARGV) { &process_file ("$wmldir/$file", $associations_index, $xmlfiles_index, $shtmlfiles_index); } } else { die "Usage: $0 [-v[erbose]] [-d[ebug]] <xml-files>\n"; } ### END of MAIN ### sub get_associations_index { my %associations_index = (); if (open (INDEXFILE, $_[0])) { my @indexfile = <INDEXFILE>; foreach my $line (@indexfile) { my ($assoc, @line) = split (/\s+/, $line); $associations_index{$assoc} = join (" ", @line); } } else { warn "The associations indexfile $_[0] cannot be read!\n"; } return \%associations_index; } sub get_xmlfiles_index { my %xmlfiles_index = (); if (open (INDEXFILE, $_[0])) { my @indexfile = <INDEXFILE>; foreach my $line (@indexfile) { my ($xmlfile, @name) = split (/\s+/, $line); $xmlfiles_index{join (" ", @name)} = $xmlfile; } } else { warn "The xmlfiles indexfile $_[0] cannot be read!\n"; } return \%xmlfiles_index; } sub get_shtmlfiles_index { my %shtmlfiles_index = (); if (open (INDEXFILE, $_[0])) { my @indexfile = <INDEXFILE>; foreach my $line (@indexfile) { my ($shtmlfile, @name) = split (/\s+/, $line); $shtmlfiles_index{join (" ", @name)} = $shtmlfile; } } else { warn "The shtmlfiles indexfile $_[0] cannot be read!\n"; } return \%shtmlfiles_index; } sub process_file { my ($file, $associations_index, $xmlfiles_index, $shtmlfiles_index) = @_; ## get topicname and associations ## my $topicname = ""; my $associations = []; if (open (XMLFILE, $_[0])) { my @xmlfile = <XMLFILE>; while (@xmlfile && $xmlfile[0] !~ /<ITEM/i) { shift @xmlfile; print "current line=$xmlfile[0]" if $debug; } $topicname = $1 if (@xmlfile && $xmlfile[0] =~ /NAME="(.+?)"/i); while (@xmlfile) { while (@xmlfile && $xmlfile[0] !~ /<tm:assoc\s/i) { shift @xmlfile; print "current line=$xmlfile[0]" if $debug && @xmlfile; } my $assoc = ""; if (@xmlfile && $xmlfile[0] =~ /id="(.+?)"/i) { $assoc = $1; print "Found association=$1\n" if $verbose; } shift @xmlfile if @xmlfile; print "current line=$xmlfile[0]" if $debug && @xmlfile; ## get all associated files! ## my @assocrl = (); while (@xmlfile && $xmlfile[0] !~ /<\/tm:assoc>/i) { while (@xmlfile && $xmlfile[0] !~ /<\/tm:assoc>/i && $xmlfile[0] !~ /<tm:assocrl\s/i) { shift @xmlfile; print "current line=$xmlfile[0]" if $debug; } if (@xmlfile && $xmlfile[0] =~ /xlink:href="(.+?)"/i) { push (@assocrl, $1); print "Found assoclink=$1\n" if $verbose; } else { print "No assoclink??\n" if $verbose && @xmlfile; } if (@xmlfile && $xmlfile[0] !~ /<\/tm:assoc>/i){ shift @xmlfile; print "current line=$xmlfile[0]" if $debug; } } if ($assoc && @assocrl) { push (@$associations, [$assoc, \@assocrl]); print "Pussing assoc=$assoc @assocrl\n" if $verbose; } else { print "Was last association\n" if $verbose && !@xmlfile; } } } else { warn "The xmlfile $_[0] cannot be read!\n"; } ## turn associations into HTML ## my @result = (); foreach my $assoc (@$associations) { my $association = ($$associations_index{$$assoc[0]} || ""); print "Assoc: $association$/" if $debug; if ($association =~ /\$.*?\$/) { my @related_topics = @{$$assoc[1]}; my @related_files = (); foreach my $related_topic (@related_topics) { if (exists $$shtmlfiles_index{$related_topic}) { push (@related_files, $$shtmlfiles_index{$related_topic}); } else { push (@related_files, ""); } } $association =~ s/\$0\$/<I>$topicname<\/I>/g; if ($association =~ /\$\*\$/) { print "Substitute \*...$/" if $verbose; while (@related_files) { my $related_file = shift @related_files; my $related_topic = shift @related_topics; my $delim = ","; $delim = " en" if (scalar @related_files == 1); $delim = "" if (scalar @related_files == 0); if ($related_file) { # add link # $related_file =~ s/\/vol\/www\/woc\/web-docs/\/woc/i; $association =~ s/\$\*\$/ <A HREF=\"\/cgi-bin-woc\/topiclink\/link.pl?$related_file+$$assoc[0]\"><I>$related_topic<\/I><\/A>$delim\$\*\$/g; } else { # don't link # $association =~ s/\$\*\$/ <I>$related_topic<\/I>\$\*\$/g; } } print "New Assoc: $association$/" if $verbose; } $association =~ s/\$\*\$//g; my $i = 1; while ($association =~ /\$$i\$/) { my $related_file = shift @related_files; my $related_topic = shift @related_topics; if ($related_file) { # add link # $related_file =~ s/\/vol\/www\/woc\/web-docs/\/woc/i; $association =~ s/\$$i\$/<A HREF=\"\/cgi-bin-woc\/topiclink\/link.pl?$related_file\+$$assoc[0]\"><I>$related_topic<\/I><\/A>/g; } else { # don't link # $association =~ s/\$$i\$/<I>$related_topic<\/I>/g; } $association =~ s/\$$i\$/<I>$topicname<\I>/g; $i++ } push (@result, "$association<BR />\n"); } elsif ($association) { my @related_topics = @{$$assoc[1]}; my @related_files = (); foreach my $related_topic (@related_topics) { if (exists $$shtmlfiles_index{$related_topic}) { push (@related_files, $$shtmlfiles_index{$related_topic}); } else { push (@related_files, ""); } } push (@result, "<I>$topicname</I> $association"); my $related_file = shift @related_files; my $related_topic = shift @related_topics; if ($related_file) { # add link # $related_file =~ s/\/vol\/www\/woc\/web-docs/\/woc/i; push (@result, " <A HREF=\"/cgi-bin-woc/topiclink/link.pl?$related_file\+$$assoc[0]\"><I>$related_topic</I></A>"); } else { # don't link # push (@result, " <I>$related_topic</I>"); } while (@related_files) { my $related_file = shift @related_files; my $related_topic = shift @related_topics; if (@related_files > 0) { if ($related_file) { # add link # $related_file =~ s/\/vol\/www\/woc\/web-docs/\/woc/i; push (@result, ", <A HREF=\"\/cgi-bin-woc\/topiclink\/link.pl?$related_file\+$$assoc[0]\"><I>$related_topic</I></A>"); } else { # don't link # push (@result, ", <I>$related_topic</I>"); } } elsif (@related_files == 0) { if ($related_file) { # add link # $related_file =~ s/\/vol\/www\/woc\/web-docs/\/woc/i; push (@result, " en <A HREF=\"\/cgi-bin-woc\/topiclink\/link.pl?$related_file\+$$assoc[0]\"><I>$related_topic</I></A>"); } else { # don't link # push (@result, " en <I>$related_topic</I>"); } } } push (@result, "<BR />\n"); } } foreach (@result) { print; } } |
From: Egon W. <eg...@us...> - 2002-07-29 09:57:16
|
Update of /cvsroot/woc/woc/cgi-bin/ssi In directory usw-pr-cvs1:/tmp/cvs-serv32298/ssi Log Message: Directory /cvsroot/woc/woc/cgi-bin/ssi added to the repository |
From: Egon W. <eg...@us...> - 2002-07-29 09:56:12
|
Update of /cvsroot/woc/woc/cgi-bin In directory usw-pr-cvs1:/tmp/cvs-serv32002/cgi-bin Log Message: Directory /cvsroot/woc/woc/cgi-bin added to the repository |
From: Egon W. <eg...@us...> - 2002-07-21 15:47:40
|
Update of /cvsroot/woc/woc In directory usw-pr-cvs1:/tmp/cvs-serv18258 Modified Files: .cvsignore Log Message: Ignore build dists. Index: .cvsignore =================================================================== RCS file: /cvsroot/woc/woc/.cvsignore,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -r1.2 -r1.3 *** .cvsignore 20 Jul 2002 09:34:26 -0000 1.2 --- .cvsignore 21 Jul 2002 15:47:38 -0000 1.3 *************** *** 5,6 **** --- 5,7 ---- aclocal.m4 web-docs + *.tar.gz |
From: Egon W. <eg...@us...> - 2002-07-21 15:47:15
|
Update of /cvsroot/woc/woc/bin/lire In directory usw-pr-cvs1:/tmp/cvs-serv17878/bin/lire Added Files: .cvsignore Makefile.am report.pl.in Log Message: Added script using Lire tools to analyze to web log. --- NEW FILE: .cvsignore --- Makefile Makefile.in *.pl --- NEW FILE: Makefile.am --- bin_SCRIPTS = \ report.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: report.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # these parameters can be customized #my $EMAIL = "woc\@sci.kun.nl"; my $EMAIL = "egonw\@sci.kun.nl"; my $TMPLOG = "/tmp/woc/woc.log"; my $WOCLOG = "/tmp/woc/woc"; my $LIRE = "@PATHTOLIRERUN@"; my $LIRE2 = "@PATHTOLIREL2R@"; my $WEESLOG = "/vol/www/wees/httpd/logs"; if ($LIRE eq "") { print "System is lacking a working Lire system\n"; exit(1); } my @reports = (); my @titles = (); push(@reports, "grep woc"); push(@titles, "Complete file"); # BELOW IS NOT WORKING PROPERLY #push(@reports, "grep items/"); #push(@titles, "item/*.shtml only"); #push(@reports, "grep cgi-bin-woc | sed -e 's/\?[^\s\"]*//g'"); #push(@titles, "cgi-bin scripts only"); #push(@reports, "grep translation"); #push(@titles, "translation/ only"); print "Extracting information on /woc server...$/"; `/usr/local/gnu/bin/gunzip -c $WEESLOG/access_log.?.gz 2> /tmp/woc/run.log | grep "woc" | cut -f 1,3- -d' ' > $TMPLOG`; my $titel = "WOC Week Statistieken"; if (-r $TMPLOG) { for (my $i=1; $i<=scalar(@reports); $i++) { print "Compiling report $WOCLOG$i.txt (with $reports[$i-1])...\n"; my $command = "cat $TMPLOG | $reports[$i-1] | $LIRE $LIRE2 www combined > $WOCLOG$i.txt"; print "$command\n"; `$command`; if (-r "$WOCLOG$i.txt") { `cat $WOCLOG$i.txt | /usr/ucb/mail -s '$titel $titles[$i-1]' $EMAIL`; } } `rm $TMPLOG`; `rm $WOCLOG*`; } else { print "Cannot find ${TMPLOG}!"; exit(1); } |
From: Egon W. <eg...@us...> - 2002-07-21 15:47:15
|
Update of /cvsroot/woc/woc/bin/download In directory usw-pr-cvs1:/tmp/cvs-serv17878/bin/download Modified Files: Makefile.in Log Message: Added script using Lire tools to analyze to web log. Index: Makefile.in =================================================================== RCS file: /cvsroot/woc/woc/bin/download/Makefile.in,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -r1.1 -r1.2 *** Makefile.in 20 Jul 2002 14:50:34 -0000 1.1 --- Makefile.in 21 Jul 2002 15:47:12 -0000 1.2 *************** *** 63,66 **** --- 63,69 ---- OBJEXT = @OBJEXT@ PACKAGE = @PACKAGE@ + PATHTOGUNZIP = @PATHTOGUNZIP@ + PATHTOLIREL2R = @PATHTOLIREL2R@ + PATHTOLIRERUN = @PATHTOLIRERUN@ PATHTOPERL = @PATHTOPERL@ PATHTOSH = @PATHTOSH@ |
From: Egon W. <eg...@us...> - 2002-07-21 15:47:15
|
Update of /cvsroot/woc/woc/bin In directory usw-pr-cvs1:/tmp/cvs-serv17878/bin Modified Files: Makefile.am Log Message: Added script using Lire tools to analyze to web log. Index: Makefile.am =================================================================== RCS file: /cvsroot/woc/woc/bin/Makefile.am,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -r1.4 -r1.5 *** Makefile.am 21 Jul 2002 10:20:01 -0000 1.4 --- Makefile.am 21 Jul 2002 15:47:12 -0000 1.5 *************** *** 1,4 **** PREFIX="@prefix@" ! SUBDIRS= cas check download index EXTRA_DIST= --- 1,4 ---- PREFIX="@prefix@" ! SUBDIRS= cas check download index lire EXTRA_DIST= |
From: Egon W. <eg...@us...> - 2002-07-21 15:47:15
|
Update of /cvsroot/woc/woc In directory usw-pr-cvs1:/tmp/cvs-serv17878 Modified Files: configure.in Log Message: Added script using Lire tools to analyze to web log. Index: configure.in =================================================================== RCS file: /cvsroot/woc/woc/configure.in,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -r1.4 -r1.5 *** configure.in 21 Jul 2002 10:20:00 -0000 1.4 --- configure.in 21 Jul 2002 15:47:12 -0000 1.5 *************** *** 8,11 **** --- 8,16 ---- AC_PATH_PROGS(PATHTOPERL, perl, no) AC_PATH_PROGS(PATHTOSH, sh bash, no) + AC_PATH_PROGS(PATHTOGUNZIP, gunzip, no) + + dnl find Lire (http://logreport.org) tools + AC_PATH_PROGS(PATHTOLIRERUN, lr_run, no) + AC_PATH_PROGS(PATHTOLIREL2R, lr_log2report, no) dnl checking for XML tools *************** *** 49,52 **** --- 54,59 ---- bin/index/woclist.pl bin/index/genlangxslts.pl + bin/lire/Makefile + bin/lire/report.pl ]) |
From: Egon W. <eg...@us...> - 2002-07-21 15:33:08
|
Update of /cvsroot/woc/woc/bin/lire In directory usw-pr-cvs1:/tmp/cvs-serv11220/lire Log Message: Directory /cvsroot/woc/woc/bin/lire added to the repository |
From: Egon W. <eg...@us...> - 2002-07-21 15:31:48
|
Update of /cvsroot/woc/woc/bin In directory usw-pr-cvs1:/tmp/cvs-serv10277 Added Files: excluded-gifs excluded-gifs-backup excluded-pdbs Log Message: Added some files with WOC specific info: lists of chemicals for which certain info should *not* be available, and thus should not give an error --- NEW FILE: excluded-gifs --- 7732-18-5 7664-93-9 7783-06-4 75-15-0 7789-20-0 7782-44-7 7647-01-0 71-52-3 506-87-6 10028-15-6 84-81-1 74-88-4 74-87-3 74-83-9 93-58-3 630-08-0 124-38-9 630-08-0 124-38-9 9005-32-7 9016-00-6 75-45-6 75-69-4 7446-09-5 505-60-2 7664-39-3 7727-37-9 --- NEW FILE: excluded-gifs-backup --- 7732-18-5 7664-93-9 7783-06-4 75-15-0 7789-20-0 7782-44-7 7647-01-0 71-52-3 506-87-6 10028-15-6 84-81-1 74-88-4 74-87-3 74-83-9 93-58-3 630-08-0 124-38-9 630-08-0 124-38-9 9005-32-7 9016-00-6 75-45-6 75-69-4 7446-09-5 --- NEW FILE: excluded-pdbs --- 7732-18-5 Vanzelfsprekend (Water) 7789-20-0 Vanzelfsprekend (Zwaar water) 7782-44-7 Vanzelfsprekend (Zuurstof) 144-55-8 Vanzelfsprekend (Zuiveringszout) 7647-01-0 Vanzelfsprekend (Zoutzuur) |
Update of /cvsroot/woc/woc/bin/check In directory usw-pr-cvs1:/tmp/cvs-serv30475/bin/check Added Files: .cvsignore Makefile.am check_filenames.pl.in list-cas.pl.in test-wmls-op-wellformed.pl.in voeg-fysische-gegevens-toe.pl.in voeg-gif-mime-toe.pl.in voeg-pdb-mime-toe.pl.in voeg-woc-nummer-toe.pl.in voeg_stylesheet_toe.pl.in zet-fysisch-props-op-extensie-xml.pl.in zet-wmls-op-ISO-8859-1-encoding.pl.in zoek-cas-bij-pdbs-voor-dadml.pl.in zoek-chems-zonder-cas.pl.in zoek-chems-zonder-fysische-props.pl.in zoek-pdbs-zonder-wml.pl.in zoek-wmls-bij-groepen.pl.in zoek-wmls-zonder-giflink.pl.in zoek-xmls-met-cas-zonder-2dcml.pl.in Log Message: Added scripts from bin/check. --- NEW FILE: .cvsignore --- Makefile Makefile.in *.pl --- NEW FILE: Makefile.am --- bin_SCRIPTS = \ check_filenames.pl \ list-cas.pl \ test-wmls-op-wellformed.pl \ voeg-fysische-gegevens-toe.pl \ voeg-gif-mime-toe.pl \ voeg-pdb-mime-toe.pl \ voeg-woc-nummer-toe.pl \ voeg_stylesheet_toe.pl \ zet-fysisch-props-op-extensie-xml.pl \ zet-wmls-op-ISO-8859-1-encoding.pl \ zoek-cas-bij-pdbs-voor-dadml.pl \ zoek-chems-zonder-cas.pl \ zoek-chems-zonder-fysische-props.pl \ zoek-pdbs-zonder-wml.pl \ zoek-wmls-bij-groepen.pl \ zoek-wmls-zonder-giflink.pl \ zoek-xmls-met-cas-zonder-2dcml.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: check_filenames.pl.in --- #! @PATHTOPERL@ -w use strict; if (scalar(@ARGV) == 0) { print "syntax: check_filenames.pl <*.wml>$/"; exit 0; } my @wmlfiles = @ARGV; my %accepted_codes = (); my $filecount = 0; my $filesparsed = 0; my $codesadded = 0; my $hasid = 0; my $wrongid = 0; foreach my $file (@wmlfiles) { $filecount++; my @content = (); my $changed = 0; my $correctcode = $file; $correctcode =~ s/.*\/(.*).xml/$1/; if (open (OPENFILE, "<$file")) { print "Checking $file ($correctcode)...$/"; @content = <OPENFILE>; for (my $i=0; $i < scalar(@content); $i++) { my $line = $content[$i]; if ($line =~ /<ITEM(.*?)>/i) { $filesparsed++; my $args = $1; if ($args =~ /CODE=\"(.*?)\"/) { $hasid++; my $code = $1; $code =~ s/\n//g; print " Code: $code$/"; $accepted_codes{"$code"} = "yes"; if ($code ne $correctcode) { $wrongid++; print " -> it should be: $correctcode$/"; print " Old: $line"; my $newline = "<ITEM$args CODE=\"$correctcode\">$/"; print " New: $newline"; $content[$i] = $newline; $changed = 1; } } else { # should correct this file print " Old: $line"; my $newline = "<ITEM$args CODE=\"$correctcode\">$/"; print " New: $newline"; $accepted_codes{"$correctcode"} = "yes"; $content[$i] = $newline; $changed = 1; } } } close(OPENFILE); } if ($changed) { if (open(OPENFILE, ">$file")) { $codesadded++; print OPENFILE @content; } else { print "ERROR: file $file could not be saved!"; } } } # now come the process of checking references my $wrong_group = ""; foreach my $file (@wmlfiles) { my @content = (); my $correctcode = $file; $correctcode =~ s/.*\/(.*).xml/$1/; if (open (OPENFILE, "<$file")) { print "Checking references in $file ($correctcode)...$/"; @content = <OPENFILE>; foreach my $line (@content) { if ($line =~ /<GROUP>(.*)<\/GROUP>/i) { my $group = $1; print " checking GROUP $group...$/"; if (!($accepted_codes{"$group"})) { print " ERROR: unknown GROUP $group$/"; } } } } } print "----------------------------------------$/"; print " files : $filecount$/"; print " checked : $filesparsed$/"; print " code : $hasid$/"; print " corrected: $wrongid$/"; print " added : $codesadded$/"; print "----------------------------------------$/"; --- NEW FILE: list-cas.pl.in --- #! @PATHTOPERL@ -w # # Make a list of all cas-nummers of which compounds are given # use strict; use diagnostics; my $debug = ""; my $wmldir = "../../data/wml"; opendir (DIR, $wmldir) || die "$wmldir: $!$/"; my @files = grep {/\.xml/i} readdir(DIR); closedir (DIR) || die "$wmldir: $!$/"; foreach my $file (@files) { my $cas = `grep -i CAS-NUMBER $wmldir/$file`; if ($cas) { chomp ($cas); $cas =~ s/\s*<INDEX.*?>\s*//ig; $cas =~ s/\s*<\/INDEX.*?>\s*//ig; print "$cas$/"; } } --- NEW FILE: test-wmls-op-wellformed.pl.in --- #! @PATHTOPERL@ -w use strict; if (! @ARGV) { die "Usage: $0 <xml-files>$/"; } use XML::Parser; my $parser = new XML::Parser(ErrorContext => 2); my $nr_files = @ARGV; my $nr_notexist = 0; my $nr_processed = 0; my $nr_warnings = 0; my $nr_ignored = 0; my $nr_okay = 0; foreach my $file (@ARGV) { if (! (-e $file)) { warn "$file: does not exist!!!$/"; $nr_notexist++; } else { print "Processing $file... "; eval { $parser->parsefile("$file"); }; if ($@) { if ($@ =~ /<sub>/i) { print "Probably found a <SUB> tag inside an attribute, ignoring...$/"; $nr_ignored++; } elsif ($@ =~ /<i>/i) { print "Probably found a <I> tag inside an attribute, ignoring...$/"; $nr_ignored++; } else { warn "$/$@$/"; $nr_warnings++; } } else { print "Okay!$/"; $nr_okay++; } $nr_processed++; } } if ($nr_files) { print " -------------------------$/"; print "Files processed : $nr_files$/"; if ($nr_warnings || $nr_ignored) { print " checked okay : $nr_okay$/"; print " warnings : $nr_warnings$/" if $nr_warnings; print " ignored warnings : $nr_ignored$/" if $nr_ignored; } else { print " all files checked okay!$/"; } print "Non-existing files : $nr_notexist$/" if ($nr_notexist); } else { print "No files to process!$/"; } --- NEW FILE: voeg-fysische-gegevens-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = "yes"; my $wmldir = "../../data/wml"; my $physpropdir = "../../data/dadml/physicalprop/cml"; my $existsfile = "tmp-physprop-exists-"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; opendir (physpropDIR, $physpropdir) || die "$physpropdir: $!$/"; my @physpropfiles = grep {/\.xml/i} readdir(physpropDIR); closedir (physpropDIR) || die "$physpropdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("PhysicalProp"); # Loop over all files foreach my $physpropfile (@physpropfiles) { my $cas = $physpropfile; $cas =~ s/\.xml//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("PhysicalProp" => $cas)); if ($linkmatch) { $nr_already_linked++; print "physprop-link found:\t$linkmatch$/" if ($debug); } else { print "$cas.xml can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { my ($wmlfile) = split (":", $nummatch); print "Going to chage $wmldir/$wmlfile and add $cas...$/" if $debug; if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling physprop-file $cas.xml$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("PhysicalProp"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @physpropfiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : ", $nr_skipped_links,$/ if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; } else { print " Already linked matches : $nr_already_linked$/"; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingphysprops = @physpropfiles - $nr_foundCAS; print " Dangling files found : $nr_danglingphysprops$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; if (open (FILE, $file)) { while (<FILE>) { if (/<\/ITEM/i) { push (@XML_data, " <INSERT MIME=\"chemical/cml\" CLASS=\"PhysicalProperties\">$cas.xml</INSERT>$/"); push (@XML_data, $_); $changed = "True"; } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } print " about to store changed file...$/" if $debug; if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "INSERT-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-gif-mime-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $gifdir = "../../data/dadml/2d/gif"; my $existsfile = "tmp-gifmime-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; my $nr_dadmlpath_added = 0; opendir (GIFDIR, $gifdir) || die "$gifdir: $!$/"; my @giffiles = grep {/\.gif/i} readdir(GIFDIR); closedir (GIFDIR) || die "$gifdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("gif"); # Loop over all files foreach my $giffile (@giffiles) { my $cas = $giffile; $cas =~ s/\.gif//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { my ($wmlfile) = split (":", $nummatch); $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("gif" => $cas)); if ($linkmatch) { #print "$cas.gif already linked!!:\t$nummatch$/"; $nr_already_linked++; print "GIF-link found:\t$linkmatch$/" if ($debug); if ($linkmatch !~ /PATH/i) { #print "\tPATH-attribuut toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { &add_dadml_path ("$wmldir/$wmlfile", $cas); $nr_dadmlpath_added++; #} } } else { print "$cas.gif can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling GIF-file $cas.gif$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("gif"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @giffiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : $nr_skipped_links$/" if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; print " PATH-attibutes added : $nr_dadmlpath_added$/" if $nr_dadmlpath_added; } else { print " Already linked matches : $nr_already_linked$/"; print " PATH-attibutes added : $nr_dadmlpath_added$/" if $nr_dadmlpath_added; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingGIFs = @giffiles - $nr_foundCAS; print " Dangling files found : $nr_danglingGIFs$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; my $has_WOC = `grep -i "<WOC>" $file`; my $has_DICT = `grep -i "<DICT>" $file` if (! $has_WOC); my $has_INDEX = `grep -i "<INDEX" $file` if (! $has_WOC); my $last_was_INDEX = ""; my $indent = ""; if (open (FILE, $file)) { if ($has_WOC) { while (<FILE>) { if (/^(\s*)<WOC>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } elsif ($has_INDEX) { while (<FILE>) { if (/^(\s*)<INDEX/i) { $indent = $1; push (@XML_data, $_); $last_was_INDEX = "True"; } elsif ($last_was_INDEX && ! /<INDEX/i) { push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); push (@XML_data, $_); $changed = "True"; $last_was_INDEX = ""; } else { push (@XML_data, $_); } } } elsif ($has_DICT) { while (<FILE>) { if (/^(\s*)<\/DICT>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } else { while (<FILE>) { if (/^(\s*)<ITEM/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "MEDIA-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } sub add_dadml_path { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; if (open (FILE, $file)) { while (<FILE>) { if (/MEDIA.*image\/gif.*$cas/i) { s/MIME=\"image\/gif\">/MIME=\"image\/gif\" PATH=\"dadml\">/ig; push (@XML_data, $_); $changed = "True"; } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "PATH-attribute added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-pdb-mime-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/dadml/3d/pdb"; my $existsfile = "tmp-pdbmime-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("x-pdb"); # Loop over all files foreach my $pdbfile (@pdbfiles) { my $cas = $pdbfile; $cas =~ s/\.pdb//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("x-pdb" => $cas)); if ($linkmatch) { $nr_already_linked++; print "PDB-link found:\t$linkmatch$/" if ($debug); } else { print "$cas.pdb can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { my ($wmlfile) = split (":", $nummatch); if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling PDB-file $cas.pdb$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("x-pdb"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @pdbfiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : ", $nr_skipped_links,$/ if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; } else { print " Already linked matches : $nr_already_linked$/"; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingPDBs = @pdbfiles - $nr_foundCAS; print " Dangling files found : $nr_danglingPDBs$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; my $has_WOC = `grep -i "<WOC>" $file`; my $has_DICT = `grep -i "<DICT>" $file` if (! $has_WOC); my $has_INDEX = `grep -i "<INDEX" $file` if (! $has_WOC); my $last_was_INDEX = ""; my $indent = ""; if (open (FILE, $file)) { if ($has_WOC) { while (<FILE>) { if (/^(\s*)<WOC>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } elsif ($has_INDEX) { while (<FILE>) { if (/^(\s*)<INDEX/i) { $indent = $1; push (@XML_data, $_); $last_was_INDEX = "True"; } elsif ($last_was_INDEX && ! /<INDEX/i) { push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); push (@XML_data, $_); $changed = "True"; $last_was_INDEX = ""; } else { push (@XML_data, $_); } } } elsif ($has_DICT) { while (<FILE>) { if (/^(\s*)<\/DICT>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } else { while (<FILE>) { if (/^(\s*)<ITEM/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "MEDIA-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-woc-nummer-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $wmldir = "../../data/wml"; my @item_start_elements = `grep -i "ITEM" $wmldir/*.xml | grep -i "ID="`; # make a list of current ID's my @idlist; foreach my $elem (@item_start_elements) { $elem =~ /ID=\"WOC(.*?)\".*/i; #print $1 . "$/"; push (@idlist, $1); } # sort thislist @idlist = sort @idlist; # what is the highest ID? my $highestid = $idlist[-1]; #print "Highest in use: WOC" . $highestid ."$/"; my $nextfree = sprintf "%08i", $highestid + 1; print "First free: WOC" . $nextfree ."$/"; # next step is to browse trough all files and add numbers my @wmlfiles = <$wmldir/*.xml>; foreach my $file (@wmlfiles) { my $mustbeupdated = 0; if (open (OPENFILE, $file)) { while ( <OPENFILE> ) { if (/<ITEM(.*?)>/) { # oke start element ITEM found if ($1 =~ /ID=\"WOC(.*?)\".*/) { # oke, has ID at this moment... print it print "ID found in $file: $1$/"; } else { # oke, no ID at this moment... add it... $mustbeupdated = 1; } } } } if ($mustbeupdated) { print "Updating $file...$/"; my @inputfile = (); if (open (OPENFILE, $file)) { while (<OPENFILE> ) { if (/(<ITEM\ )(.*>)/) { $_ = "$1ID=\"WOC$nextfree\" $2$/"; # raise next free ID $nextfree = sprintf "%08i", $nextfree + 1; } push (@inputfile, $_); } if (open (OPENFILE, ">$file")) { print OPENFILE @inputfile; } } } } --- NEW FILE: voeg_stylesheet_toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $dir = "../../data/dadml/fysisch/cml"; # next step is to browse trough all files and add numbers my @files = <$dir/*.xml>; my $insert = "<\?xml-stylesheet type=\"text\/xsl\" href=\"http:\/\/www.sci.kun.nl\/woc\/data\/dadml\/fysisch\/cml\/cml.xsl\" \?>\n"; foreach my $file (@files) { my $mustbeupdated = 1; if (open (OPENFILE, $file)) { while ( <OPENFILE> ) { if (/<\?xml-stylesheet/) { $mustbeupdated = 0; } } } if ($mustbeupdated) { print "Updating $file...$/"; my @inputfile = (); if (open (OPENFILE, $file)) { while (<OPENFILE> ) { if (/<molecule/i) { push (@inputfile, $insert); } push (@inputfile, $_); } if (open (OPENFILE, ">$file")) { print OPENFILE @inputfile; } } } } --- NEW FILE: zet-fysisch-props-op-extensie-xml.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $nr_changed = 0; ### de lange versie van opendir ### #opendir (WMLDIR, $wmldir) || die "WMLDIR: $!$/"; #my @wmlfiles = grep {/\.xml/i} readdir(WMLDIR); #closedir (WMLDIR) || die "$wmldir: $!$/"; ### de korte versie van opendir ### #my @wmlfiles = <$wmldir/*.xml>; ### Loop over all files ### # met een foreach over @ARGV kun je het script aanroepen met ../wml/*.xml # dit wordt door de shell al uitgevouwen tot ../wml/azijnzuur.xml etc.. foreach my $wmlfile (@ARGV) { my @file = (); my $change_needed = ""; print "Reading $wmlfile... "; open (FILE, "<$wmlfile") || die "$wmlfile: $!$/"; while (<FILE>) { my $line = $_; if ($line =~ m/<INSERT.*?>(.*?)\.cml<\/INSERT>/) { my $casnummer = $1; $change_needed = "yes"; print "new extension necessary ($casnummer, $line)..."; push (@file, " <INSERT MIME=\"chemical/cml\" CLASS=\"PhysicalProperties\">$casnummer.xml<\/INSERT>$/"); } else { push (@file, $line); } } close(FILE); if ($change_needed eq "yes") { open (OUT, ">$wmldir/$wmlfile") || die "File $wmlfile not writeable!$/"; foreach my $line (@file) { print OUT $line; } close(OUT); print "Changed!$/"; } else { print "Nothing done!$/"; } } print "-----------------$/"; print "Files changed: $nr_changed$/"; --- NEW FILE: zet-wmls-op-ISO-8859-1-encoding.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $nr_changed = 0; ### de lange versie van opendir ### #opendir (WMLDIR, $wmldir) || die "WMLDIR: $!$/"; #my @wmlfiles = grep {/\.xml/i} readdir(WMLDIR); #closedir (WMLDIR) || die "$wmldir: $!$/"; ### de korte versie van opendir ### #my @wmlfiles = <$wmldir/*.xml>; ### Loop over all files ### # met een foreach over @ARGV kun je het script aanroepen met ../wml/*.xml # dit wordt door de shell al uitgevouwen tot ../wml/azijnzuur.xml etc.. foreach my $wmlfile (@ARGV) { my @file = (); my $change_needed = ""; print "Reading $wmlfile... "; if (open (FILE, "<$wmlfile")) { while (<FILE>) { my $line = $_; if (($line =~ m/<\?xml.*?\?>/) && ($line !~ m/ISO-8859-1/)) { $change_needed = "yes"; print "new encoding necessary... "; unshift (@file, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>$/"); # geen push maar een unshift, om deze regel pertinent als eerste te krijgen # } else { push (@file, $line); } } close(FILE); } else { warn "$wmlfile: $!$/"; } if ($change_needed eq "yes") { if (open (OUT, ">$wmldir/$wmlfile")) { foreach my $line (@file) { print OUT $line; } close(OUT); print "Changed!$/"; $nr_changed++; } else { warn "File $wmlfile not writeable!$/"; } } else { print "Nothing done!$/"; } } print "-----------------$/"; print "Files changed: $nr_changed$/"; --- NEW FILE: zoek-cas-bij-pdbs-voor-dadml.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/compounds/pdb/new"; my $existsfile = "tmp-find-cas-exists"; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("LANG"); foreach my $pdbfile (@pdbfiles) { $pdbfile =~ s/\.pdb//ig; $pdbfile =~ tr/A-Z/a-z/; my $namematch = &exists (("LANG" => $pdbfile)); if ($namematch) { print "NAME match for $pdbfile.pdb:\t$namematch$/" if $debug; my ($wmlfile) = split (":", $namematch); $wmlfile =~ s/^\s*//i; print "WMLFILE:$wmlfile.$/" if $debug; my $cas = `grep -i CAS-NUMBER $wmldir/$wmlfile`; if ($cas) { chomp ($cas); $cas =~ s/\s*<INDEX.*?>\s*//ig; $cas =~ s/\s*<\/INDEX.*?>\s*//ig; print "CAS number $cas\tfound for $pdbfile.pdb\tin $wmlfile$/"; } else { print "no CAS number found for $pdbfile.pdb in $wmlfile$/" if $debug; } } else { print "Dangling PDB-file $pdbfile.pdb$/" if $debug; } } &remove_existskeyfile ("LANG"); sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } --- NEW FILE: zoek-chems-zonder-cas.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_checked = 0; my $nr_chemicals = 0; my $nr_groups = 0; my $nr_noCAS = 0; # Loop over all files foreach my $arg (@ARGV) { &look_for_CAS_in_file ($arg); } # Print statistics print " ------------------------$/"; print "Files checked : $nr_checked$/"; if ($nr_chemicals) { print "Chemicals found : $nr_chemicals$/"; print "Chemical groups found : $nr_groups$/" if ($nr_groups); if ($nr_noCAS) { print "Chemicals without CAS : $nr_noCAS$/"; } else { print "All chemicals had CAS!$/"; } } else { print "No Chemicals found.$/"; } sub look_for_CAS_in_file { my $file = shift; my $changed = ""; print "Processing $file\t"; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; my $group = ""; while (<FILE>) { if (/CHEMICAL/i) { print "CHEMICAL! " if (!$chemical); # 1 hit is enough # $chemical = "True"; $nr_chemicals++; } elsif (/CLASS="GROUP"/i) { print "GROUP! " if (!$group); # 1 hit is enough # $group = "True"; $nr_groups++; } elsif (/(CAS|WOC)-NUMBER/i && /\>(.+?)\</) { my $type = $1; print "$type number found...$/" if ($debug && ! $cas_found); # 1 hit is enough # $cas_found = "True"; } } if ($chemical && !$group && !$cas_found) { $nr_noCAS++; print "No CAS number found...$/"; } else { print "$/" unless $debug; } close (FILE) || warn "$file: $!$/"; $nr_checked++; } else { warn "$file: $!$/"; } } --- NEW FILE: zoek-chems-zonder-fysische-props.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_checked = 0; my $nr_chemicals = 0; my $nr_groups = 0; my $nr_noCAS = 0; my $nr_noPP = 0; # Loop over all files foreach my $arg (@ARGV) { &look_for_CAS_in_file ($arg); } # Print statistics #print " ------------------------$/"; #print "Files checked : $nr_checked$/"; #if ($nr_chemicals) { # print "Chemicals found : $nr_chemicals$/"; # print "Chemical groups found : $nr_groups$/" if ($nr_groups); # if ($nr_noPP) { # print "Chemicals without Physical Properties : $nr_noPP$/"; # } else { # print "All chemicals had CAS!$/"; # } #} else { # print "No Chemicals found.$/"; #} sub look_for_CAS_in_file { my $file = shift; my $changed = ""; #print "Processing $file\t"; if (open (FILE, $file)) { my $cas_found = ""; my $casno = "00-00-0"; my $chemical = ""; my $group = ""; while (<FILE>) { if (/CHEMICAL/i) { # print "CHEMICAL! " if (!$chemical); # 1 hit is enough # $chemical = "True"; $nr_chemicals++; } elsif (/CLASS="GROUP"/i) { $group = "True"; $nr_groups++; } elsif (/(CAS|WOC)-NUMBER/i && /\>(.+?)\</) { my $type = $1; if (/\>(.+?)\</) { $casno = $1; }; $cas_found = "True"; } } if ($chemical && $cas_found && (!-e "/vol/www/woc/web-docs/data/dadml/fysisch/cml/$casno.xml")) { $nr_noPP++; #print "No physical properties found for ($casno)!$/"; print "$casno$/"; } else { #print "$/" unless $debug; } close (FILE) || warn "$file: $!$/"; $nr_checked++; } else { warn "$file: $!$/"; } } --- NEW FILE: zoek-pdbs-zonder-wml.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/dadml/3d/pdb"; my $existsfile = "tmp-nowml-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("x-pdb"); # Loop over all files foreach my $pdbfile (@pdbfiles) { $pdbfile =~ s/\.pdb//ig; my $nummatch = &exists (("NUMBER" => $pdbfile)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("x-pdb" => $pdbfile)); if ($linkmatch) { $nr_already_linked++; print "PDB-link found:\t$linkmatch$/" if ($debug); } else { print "$pdbfile.pdb can be linked!!:\t$nummatch$/"; } } else { print "Dangling PDB-file $pdbfile.pdb$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("x-pdb"); # Print statistics print " ------------------------$/"; print "Files checked : ",scalar @pdbfiles, $/; if ($nr_foundCAS) { print "CAS or WOC number found : $nr_foundCAS$/"; print "Already linked matches : $nr_already_linked$/"; print "Matches linkable : ", $nr_foundCAS - $nr_already_linked,$/; } else { print "Not one file name matched an item name...$/"; } sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } --- NEW FILE: zoek-wmls-bij-groepen.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $existsfile = "tmp-groupclass-exists"; my %groupitems; my @files = <$wmldir/*.xml>; foreach my $file (@files) { if (open (OPENFILE, $file)) { my $itemname; my @groupnames; while ( <OPENFILE> ) { if (/<ITEM NAME=\"(.*?)\" CLASS=\"GROUP\">/) { $itemname = $1; push (@groupnames, $1); } elsif (/<ITEM NAME=\"(.*?)\".*>/) { $itemname = $1; } elsif (m/<GROUP.*?>(.*)<\/GROUP>/i) { push (@groupnames, $1); #is er een groep? # zo ja, voeg $itemname toe aan array in hash } } foreach my $group (@groupnames) { if (exists $groupitems{$group}) { $groupitems{$group} = [@{$groupitems{$group}}, $itemname]; } else { $groupitems{$group} = [$itemname]; } } } #close (OPENFILE); } my %groups = %groupitems; foreach my $group (keys %groupitems) { foreach my $item (@{$groupitems{$group}}) { if (exists $groupitems{$item} && $group ne $item) { delete $groups{$item}; } } } if ($debug) { foreach my $group (keys %groups) { printBoom(0, $group, \%groupitems); } exit; } &create_existskeyfile ("ITEM"); foreach my $group (keys %groups) { my $file = $group; $file =~ tr/[A-Z]/[a-z]/; $file =~ s/\'//g; $file =~ s/\s+//g; $file =~ s/\-//g; $file =~ s/\&//g; $file =~ s/\;//g; $file =~ s/\,//g; if (! -e "$wmldir/$file.xml") { my $itemmatch = &exists (("ITEM" => $group)); if ($itemmatch) { ($file) = split (":", $itemmatch); if ($itemmatch =~ /CLASS=\"GROUP\"/i) { print "Groupfile $file for group $group exists...$/" if $debug; } else { print "Found groupfile $file for group $group...$/"; } } else { print "WARN: cannot find groupfile for group $group!!!$/"; } } #&check_for_class_presence ($file, $key); } &remove_existskeyfile ("ITEM"); sub printBoom { my ($indent, $key, $boom) = @_; print " "x$indent,"Group ($key):\n"; foreach my $item (@{$$boom{$key}}) { #print " "x$indent," $item\n"; if (exists $$boom{$item} && $key ne $item) { printBoom($indent+2,$item, $boom); } } } sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -h -i "$filters{$key}" $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } --- NEW FILE: zoek-wmls-zonder-giflink.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $datadir = "../../data/wml"; my $gifgalarydir = "../../data/compounds/gifgalary"; my $excludefile = "excluded-gifs"; my @excludes = (); open (FILE, $excludefile) || die "$excludefile: $!$/"; while (<FILE>) { chomp; s/^\s*//; ($_) = split ("\s+", $_); push (@excludes, $_); } `grep -L MIXTURE $datadir/*.xml > tmp-no-mix`; `grep -L POLYMER \`cat tmp-no-mix\` > tmp-no-pol`; `grep -L image/gif \`cat tmp-no-pol\` > tmp-no-img`; `grep -i CAS-NUMBER \`cat tmp-no-img\` > tmp-no-gif`; `rm tmp-no-mix > /dev/null`; `rm tmp-no-pol > /dev/null`; `rm tmp-no-img > /dev/null`; open (FILE, "tmp-no-gif") || die "tmp-no-gif: $!$/"; while (<FILE>) { chomp; s/$datadir\///ig; s/\s*<INDEX.*?>\s*//ig; s/\s*<\/INDEX.*?>\s*/.gif/ig; if (exclude_item ($_)) { print "Excluding item $_...$/" if $debug; } else { my @file = split (":", $_); printf "%40s\t%s$/", @file if (!(-e "$gifgalarydir/".$file[1])); } } `rm tmp-no-gif > /dev/null`; sub exclude_item { my $item = shift; foreach my $cas (@excludes) { if ($item =~ /$cas/i) { print "Match $item found!$/" if $debug; return "$item"; } } return ""; } --- NEW FILE: zoek-xmls-met-cas-zonder-2dcml.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $datadir = "../../data/wml"; my $cmldir = "../../data/dadml/2d/cml"; `grep -i CAS-NUMBER $datadir/*.xml > tmp-has-cas`; `grep -i SMILES $datadir/*.xml > tmp-has-smiles`; open (FILE, "tmp-has-cas") || die "tmp-has-cas: $!$/"; while (<FILE>) { chomp; s/\s*<INDEX.*?>\s*//ig; s/\s*<\/INDEX.*?>\s*//ig; my @file = split (":", $_); print "NA: $file[1] \t$file[0]$/" if (!(-e "$cmldir/".$file[1].".cml")); my @smiles = split(":", `grep $file[0] tmp-has-smiles`); print $smiles[1] if (@smiles); } `rm tmp-has-cas > /dev/null`; `rm tmp-has-smiles > /dev/null`; |
From: Egon W. <eg...@us...> - 2002-07-21 10:20:04
|
Update of /cvsroot/woc/woc In directory usw-pr-cvs1:/tmp/cvs-serv30475 Modified Files: configure.in Log Message: Added scripts from bin/check. Index: configure.in =================================================================== RCS file: /cvsroot/woc/woc/configure.in,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -r1.3 -r1.4 *** configure.in 20 Jul 2002 15:17:14 -0000 1.3 --- configure.in 21 Jul 2002 10:20:00 -0000 1.4 *************** *** 23,26 **** --- 23,44 ---- bin/cas/check_doublenumbers.pl bin/cas/zoek-UNs-zonder-wml.pl + bin/check/Makefile + bin/check/check_filenames.pl + bin/check/list-cas.pl + bin/check/test-wmls-op-wellformed.pl + bin/check/voeg-fysische-gegevens-toe.pl + bin/check/voeg-gif-mime-toe.pl + bin/check/voeg-pdb-mime-toe.pl + bin/check/voeg-woc-nummer-toe.pl + bin/check/voeg_stylesheet_toe.pl + bin/check/zet-fysisch-props-op-extensie-xml.pl + bin/check/zet-wmls-op-ISO-8859-1-encoding.pl + bin/check/zoek-cas-bij-pdbs-voor-dadml.pl + bin/check/zoek-chems-zonder-cas.pl + bin/check/zoek-chems-zonder-fysische-props.pl + bin/check/zoek-pdbs-zonder-wml.pl + bin/check/zoek-wmls-bij-groepen.pl + bin/check/zoek-wmls-zonder-giflink.pl + bin/check/zoek-xmls-met-cas-zonder-2dcml.pl bin/download/Makefile bin/download/download_pdb_from_NCI.pl |
From: Egon W. <eg...@us...> - 2002-07-21 10:20:04
|
Update of /cvsroot/woc/woc/bin In directory usw-pr-cvs1:/tmp/cvs-serv30475/bin Modified Files: Makefile.am Log Message: Added scripts from bin/check. Index: Makefile.am =================================================================== RCS file: /cvsroot/woc/woc/bin/Makefile.am,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -r1.3 -r1.4 *** Makefile.am 20 Jul 2002 15:17:14 -0000 1.3 --- Makefile.am 21 Jul 2002 10:20:01 -0000 1.4 *************** *** 1,4 **** PREFIX="@prefix@" ! SUBDIRS= cas download index EXTRA_DIST= --- 1,4 ---- PREFIX="@prefix@" ! SUBDIRS= cas check download index EXTRA_DIST= |
From: Egon W. <eg...@us...> - 2002-07-21 09:54:58
|
Update of /cvsroot/woc/woc/bin/check In directory usw-pr-cvs1:/tmp/cvs-serv23488/check Log Message: Directory /cvsroot/woc/woc/bin/check added to the repository |
From: Egon W. <eg...@us...> - 2002-07-20 15:17:16
|
Update of /cvsroot/woc/woc/bin In directory usw-pr-cvs1:/tmp/cvs-serv2361/bin Modified Files: Makefile.am Log Message: A few things I forgot to commit. Index: Makefile.am =================================================================== RCS file: /cvsroot/woc/woc/bin/Makefile.am,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -r1.2 -r1.3 *** Makefile.am 20 Jul 2002 14:50:34 -0000 1.2 --- Makefile.am 20 Jul 2002 15:17:14 -0000 1.3 *************** *** 1,4 **** PREFIX="@prefix@" ! SUBDIRS= index download EXTRA_DIST= --- 1,4 ---- PREFIX="@prefix@" ! SUBDIRS= cas download index EXTRA_DIST= |
From: Egon W. <eg...@us...> - 2002-07-20 15:17:16
|
Update of /cvsroot/woc/woc/bin/download In directory usw-pr-cvs1:/tmp/cvs-serv2361/bin/download Added Files: .cvsignore Log Message: A few things I forgot to commit. --- NEW FILE: .cvsignore --- Makefile Makefile.in *.pl |
From: Egon W. <eg...@us...> - 2002-07-20 15:17:16
|
Update of /cvsroot/woc/woc In directory usw-pr-cvs1:/tmp/cvs-serv2361 Modified Files: configure.in Log Message: A few things I forgot to commit. Index: configure.in =================================================================== RCS file: /cvsroot/woc/woc/configure.in,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -r1.2 -r1.3 *** configure.in 20 Jul 2002 14:25:32 -0000 1.2 --- configure.in 20 Jul 2002 15:17:14 -0000 1.3 *************** *** 15,18 **** --- 15,29 ---- Makefile bin/Makefile + bin/cas/Makefile + bin/cas/add_nsc_number.pl + bin/cas/add_numbers.pl + bin/cas/add_smiles.pl + bin/cas/add_un_nummers.pl + bin/cas/check_cas_nummers.pl + bin/cas/check_doublenumbers.pl + bin/cas/zoek-UNs-zonder-wml.pl + bin/download/Makefile + bin/download/download_pdb_from_NCI.pl + bin/download/download_xyz_from_NCI.pl bin/index/Makefile bin/index/woclist.sh |
From: Egon W. <eg...@us...> - 2002-07-20 15:14:33
|
Update of /cvsroot/woc/woc/bin/cas In directory usw-pr-cvs1:/tmp/cvs-serv1745 Added Files: .cvsignore CAS2NSC.txt CAS2SMILES.txt CAS2UN.txt Makefile.am add_nsc_number.pl.in add_numbers.pl.in add_smiles.pl.in add_un_nummers.pl.in check_cas_nummers.pl.in check_doublenumbers.pl.in zoek-UNs-zonder-wml.pl.in Log Message: Added scripts checking and adding indices to WML files. --- NEW FILE: .cvsignore --- Makefile Makefile.in *.pl --- NEW FILE: CAS2NSC.txt --- 100-00-5 9792 100-01-6 9797 100-02-7 1317 100-06-1 5601 100-06-1 209523 100-07-2 86125 100-09-4 7926 100-09-4 32742 100-10-7 5517 100-11-8 4609 100-12-9 858 100-14-1 9803 100-15-2 5390 100-16-3 4079 100-17-4 5507 100-18-5 84198 100-19-6 41590 100-20-9 41885 100-21-0 36973 [...126750 lines suppressed...] 998-29-8 96837 998-30-1 124134 998-40-3 91700 998-41-4 111642 998-91-4 2321 998-91-4 224119 998-98-1 79255 999-21-3 4799 999-23-5 6406 999-29-1 58406 999-30-4 16588 999-33-7 223056 999-37-1 509380 999-47-3 34835 999-50-8 58807 999-55-3 20945 999-68-8 93208 999-79-1 525131 999-81-5 34858 999-97-3 93895 --- NEW FILE: CAS2SMILES.txt --- 100-00-5 [N+]([O-])(=O)C1=CC=C(Cl)C=C1 100-01-6 [N+]([O-])(=O)C1=CC=C(N)C=C1 100-02-7 [N+]([O-])(=O)C1=CC=C(O)C=C1 100-06-1 C(C)(=O)C1=CC=C(OC)C=C1 100-09-4 C(=O)(O)C1=CC=C(OC)C=C1 100-10-7 N(C)(C)C1=CC=C(C=O)C=C1 100-11-8 [N+]([O-])(=O)C1=CC=C(CBr)C=C1 100-12-9 [N+]([O-])(=O)C1=CC=C(CC)C=C1 100-14-1 [N+]([O-])(=O)C1=CC=C(CCl)C=C1 100-15-2 [N+]([O-])(=O)C1=CC=C(NC)C=C1 100-16-3 [N+]([O-])(=O)C1=CC=C(NN)C=C1 100-17-4 [N+]([O-])(=O)C1=CC=C(OC)C=C1 100-23-2 N(C)(C)C1=CC=C([N+](=O)[O-])C=C1 100-25-4 [N+]([O-])(=O)C1=CC=C([N+](=O)[O-])C=C1 100-26-5 C(=O)(O)C1=CC=C(C(=O)O)N=C1 100-28-7 [N+]([O-])(=O)C1=CC=C(N=C=O)C=C1 100-29-8 [N+]([O-])(=O)C1=CC=C(OCC)C=C1 100-32-3 [N+]([O-])(=O)C2=CC=C(SSC1=CC=C([N+](=O)[O-])C=C1)C=C2 100-33-4 C(=N)(N)C2=CC=C(OCCCCCOC1=CC=C(C(=N)N)C=C1)C=C2 [...9537 lines suppressed...] 99-87-6 C(C)(C)C1=CC=C(C)C=C1 99-88-7 C(C)(C)C1=CC=C(N)C=C1 99-89-8 C(C)(C)C1=CC=C(O)C=C1 99-91-2 C(C)(=O)C1=CC=C(Cl)C=C1 99-92-3 C(C)(=O)C1=CC=C(N)C=C1 99-93-4 C(C)(=O)C1=CC=C(O)C=C1 99-94-5 C(=O)(O)C1=CC=C(C)C=C1 99-96-7 C(=O)(O)C1=CC=C(O)C=C1 99-97-8 N(C)(C)C1=CC=C(C)C=C1 99-98-9 N(C)(C)C1=CC=C(N)C=C1 99-99-0 [N+]([O-])(=O)C1=CC=C(C)C=C1 992-59-6 S(=O)(=O)(O)C1=C6C(=C(N)C(=C1)N=NC5=C(C)C=C(C4=CC(=C(N=NC3=C(N)C2=C(C=CC=C2)C(=C3)S(=O)(=O)O)C=C4)C)C=C5)C=CC=C6 994-31-0 [Sn](CC)(CC)(CC)Cl 996-19-0 S(=O)(=O)(O)O.C(NN)(=N)N 996-97-4 N(C(CCCCCCC)=O)(CC)CC 996-98-5 C(C(NN)=O)(NN)=O 997-95-5 N(CC(C)C)(CC(C)C)N=O 998-91-4 C(=CC(OCC)=O)(OCC)C 999-21-3 C(C=CC(OCC=C)=O)(OCC=C)=O 999-23-5 N(CCN(CCC#N)C)(CCC#N)C --- NEW FILE: CAS2UN.txt --- #CAS-nummer <=> UN-nummer 100-00-5 1578 100-01-6 1661 100-02-7 1663 100-25-4 1597 100-37-8 2686 100-39-0 1737 100-40-3 1993 100-41-4 1175 100-42-5 2055 100-44-7 1738 100-47-0 2224 100-51-6 2810 100-52-7 1990 100-61-8 2294 100-63-0 2572 100-80-1 2618 100-97-0 1328 10024-97-2 1070 10025-67-9 1828 10025-69-1 2923 10025-78-2 1295 10025-87-3 1810 10025-91-9 1733 10026-04-7 1818 10026-13-8 1806 10031-13-7 1618 10035-10-6 1048 10045-94-0 1625 10048-95-0 1685 101-02-0 2811 101-68-8 2489 101-72-4 2811 101-77-9 2651 101-84-8 3077 10102-18-8 2630 10102-44-0 1067 10103-50-1 1622 10103-61-4 1557 10108-64-2 2570 10112-91-1 2025 10124-50-2 1678 10192-30-0 2693 102-82-9 2542 10265-92-6 2783 10290-12-7 1586 10294-33-4 2692 10294-34-5 1741 10326-27-9 1564 10361-37-2 1564 104-15-4 2585 104-94-9 02431 105-46-4 1123 105-58-8 2366 105-74-8 2124, 106-35-4 1224 106-42-3 1307 106-44-5 2076 106-46-7 2811 106-47-8 2018 106-48-9 2020 106-49-0 1708 106-50-3 1673 106-51-4 2587 106-87-6 2810 106-88-7 3022 106-89-8 2023 106-92-3 2219 106-93-4 1605 106-97-8 1011 106-98-9 1012 106-99-0 1010 107-01-7 1012 107-02-8 1092 107-05-1 1100 107-06-2 1184 107-07-3 1135 107-10-8 1277 107-11-9 2334 107-12-0 2404 107-13-1 1093 107-14-2 2668 107-15-3 1604 107-18-6 1098 107-19-7 2929 107-20-0 2232 107-30-2 1239 107-31-3 1243 107-39-1 2050 107-40-4 2050 107-49-3 3018 107-83-5 1208 107-87-9 1249 107-98-2 3092 108-01-0 2051 108-03-2 2608 108-05-4 1301 108-10-1 1245 108-11-2 2053 108-18-9 1158 108-20-3 1159 108-21-4 1220 108-23-6 2407 108-24-7 1715 108-31-6 2215 108-38-3 1307 108-39-4 2076 108-42-9 2019 108-43-0 2020 108-44-1 1708 108-46-3 2876 108-65-6 1993 108-67-8 2325 108-70-3 2321 108-77-0 2670 108-83-8 1157 108-87-2 2296 108-88-3 1294 108-89-4 2313 108-90-7 1134 108-91-8 2357 108-94-1 1915 108-95-2 1671 108-98-5 2337 108-99-6 2313 109-06-8 2313 109-55-7 2734 109-60-4 1276 109-66-0 1265 109-69-3 1127 109-73-9 1125 109-79-5 2347 109-86-4 1188 109-87-5 1234 109-89-7 1154 109-92-2 1302 109-94-4 1190 109-99-9 2056 110-00-9 2389 110-01-0 2412 110-02-1 2414 110-12-3 2302 110-16-7 1759 110-19-0 1213 110-43-0 1110 110-49-6 1189 110-54-3 1208 110-63-4 2810 110-80-5 1171 110-82-7 1145 110-83-8 2256 110-86-1 1282 110-89-4 2401 110-91-8 2054 11097-69-1 2315 111-14-8 3265 111-15-9 1172 111-30-8 2810 111-40-0 2079 111-65-9 1262 111-66-0 1993 111-69-3 2205 111-76-2 2369 111-84-2 1920 1113-38-8 2811 112-24-3 2259 112-55-0 1228 114-26-1 2757 115-11-7 1055 115-29-7 2761 115-32-2 2761 116-06-3 2757 116-29-0 2761 118-74-1 2729 118-75-2 2588 118-96-7 0209 119-93-7 2811 120-36-5 2765 120-51-4 2810 120-71-8 2431 120-80-9 2811 120-82-1 2321 120-83-2 2020 120-92-3 2245 12001-29-5 2590 12057-74-8 2011 12079-65-1 2811 121-14-2 2038 121-43-7 2416 121-44-8 1296 121-69-7 2253 121-75-5 3082 12108-13-3 2810 12122-67-7 2771 12125-01-8 2505 12136-45-7 2033 122-14-5 3018 122-34-9 2763 122-39-4 2811 122-52-1 2323 122-79-2 2810 123-05-7 1191 123-31-9 2662 123-38-6 1275 123-51-3 1105 123-54-6 2310 123-62-6 2496 123-72-8 1129 123-77-3 3242 123-86-4 1123 123-91-1 1165 123-92-2 1104 124-02-7 2359 124-09-4 2280 124-18-5 2247 124-38-9 1013 124-40-3 1032 124-41-4 1431 124-58-3 1557 124-63-0 3246 12427-38-2 2210 126-30-7 1325 126-75-0 3018 126-98-7 3079 126-99-8 1991 127-18-4 1897 1300-73-8 1711 1302-42-7 2812 1303-28-2 1559 1304-28-5 1884 1304-29-6 1449 1305-62-0 1759 1305-78-8 1910 1306-19-0 2570 1309-64-4 1549 131-17-9 2810 131-52-2 2567 1310-58-3 1813 1310-65-2 2680 1310-66-3 2680 1310-73-2 1823 1312-73-8 1382 1313-99-1 2811 1314-34-7 3285 1314-56-3 1807 1314-62-1 2862 1314-84-7 1714 1317-36-8 3288 13171-21-6 3018 1319-77-3 2076, 1321-65-9 2811 1327-53-3 1561 133-06-2 2773 1333-74-0 1049 1333-82-0 1463 1333-86-4 1361 1336-21-6 2672 1338-02-9 3009 13463-39-3 1259 13463-40-6 1994 13477-00-4 1445 13494-80-9 2811 137-26-8 2771 137-30-4 2771 137-32-6 1105 13768-86-0 2928 13952-84-6 1992 140-88-5 1917 141-32-2 2348 141-43-5 2491 141-66-2 3018 141-78-6 1173 141-79-7 1229 142-04-1 1548 142-28-9 1992 142-62-1 2829 142-82-5 1206 142-96-1 1149 143-10-2 1228 143-33-9 1689 144-49-0 2642 144-62-7 2923 14484-64-1 2771 14977-61-8 1758 151-50-8 1680 151-56-4 1185 151-67-7 1610 1563-66-2 2757 1569-69-3 3054 15950-66-0 2020 15972-60-8 2588 1600-27-7 1629 16219-75-3 1993 1634-04-4 2398 16752-77-5 2757 16871-90-2 2655 16872-11-0 1775 1689-83-4 2588 16893-85-9 2674 16961-83-4 1778 17702-41-9 1868 17804-35-2 2757 1836-75-5 2779 1897-45-6 2588 1910-42-5 2781 1912-24-9 2763 1912-26-1 2763 1918-00-9 2769 19287-45-7 1911 19624-22-7 1380 19750-95-9 2588 2032-59-9 2757 2050-92-2 2841 20816-12-0 2471 20859-73-8 1397 2104-64-5 2783 21908-53-2 1641 22224-92-6 2783 2243-62-1 None 2303-17-5 2757 2310-17-0 2783 2425-06-1 2773 25013-15-4 2618 25057-89-0 2588 25154-52-3 3082 25154-54-5 1597 2524-04-1 2751 25321-14-6 2038 25340-17-4 2049 2551-62-4 1080 26628-22-8 1687 2807-30-9 1993 2813-95-8 3014 28434-00-6 2902 2855-13-2 2289 287-92-3 1146 2893-78-9 2465 2921-88-2 2783 298-00-0 2783 299-86-5 2783 300-76-5 3018 301-04-2 1616 302-01-2 2029 302-17-0 2811 30525-89-4 2213 309-00-2 2761 3173-53-3 2488 3209-22-1 1578 3268-49-3 2785 333-41-5 3018 3333-67-3 2811 3383-96-8 2783 3452-97-9 n.o.s 353-50-4 2417 353-59-3 1974 35400-43-2 3018 3689-24-5 1704 3710-30-3 2309 3724-65-0 2823 3811-04-9 1485 39300-45-3 3013 3982-91-0 1837 4098-71-9 2290 4109-96-0 2189 4170-30-3 1143 420-04-2 3276 431-03-8 2346 479-45-8 0208 485-31-4 2779 50-00-0 1198 50-29-3 2761 50-78-2 2811 504-29-0 2671 506-68-3 1889 506-77-4 1589 506-78-5 1588 506-93-4 1467 506-96-7 1716 507-09-5 2436 51-28-5 1320 513-36-0 1127 51630-58-1 2902 52-51-7 3241 52-68-6 2783 52315-07-8 2902 52645-53-1 2902 528-29-0 1597 52918-63-5 2588 532-27-4 1697 5329-14-6 2967 533-74-4 2588 54-11-5 1654 540-59-0 1150 540-84-1 1262 541-41-3 1182 542-88-1 2249 542-92-7 1993 55-38-9 3018 55-63-0 0143 55-68-5 1895 554-00-7 1590 554-13-2 2811 556-52-5 2810 558-13-4 2516 56-23-5 1846 56-35-9 3020 56-38-2 3018 56-72-4 3027 563-12-2 3018 563-80-4 2397 57-06-7 1545 57-14-7 1163 57-24-9 1692 57-57-8 2810 57-74-9 2996 58-08-2 1544 58-89-9 2761 583-59-5 2617 583-78-8 2020 584-02-1 1105 584-79-2 2902 584-84-9 2078 59-50-7 2669 590-01-2 1914 590-18-1 1012 591-35-5 2020 591-78-6 1224 592-01-8 1575 592-27-8 1262 592-41-6 2370 592-57-4 1993 592-84-7 1128 594-42-3 1670 60-24-2 2966 60-29-7 1155 60-34-4 1244 60-35-5 2811 60-41-3 1692 60-51-5 2783 60-57-1 2761 602-01-7 2038 606-20-2 2038 608-27-5 1590 608-31-1 1590 608-73-1 2761 61-82-5 2588 610-39-9 2038 610-40-2 1577 611-06-3 1578 611-15-4 2618 6164-98-3 2588 61789-51-3 2001 61790-14-5 2810 62-38-4 1674 62-53-3 1547 62-55-5 2811 62-56-6 2810 62-73-7 3018 62-74-8 2629 62-75-9 2810 622-45-7 2243 622-97-9 2618 624-64-6 1012 624-83-9 2480 626-38-0 1104 626-93-7 2282 628-63-7 1104 63-25-2 2757 630-08-0 1016 63989-69-5 1607 64-17-5 1170 64-18-6 1779 64-19-7 2789 64-67-5 1594 640-15-3 3018 6484-52-2 1942 65-30-5 1658 65-31-6 1659 66-81-9 2588 67-56-1 1230 67-63-0 1219 67-64-1 1090 67-66-3 1888 674-82-8 2521 68-12-2 2265 68085-85-8 2902 681-84-5 2606 6834-92-0 1759 68694-11-1 2588 6923-22-4 2783 70-30-4 2875 71-23-8 1274 71-36-3 1120 71-41-0 1105 71-43-2 1114 71-55-6 2831 732-11-6 2783 74-82-8 1971 74-83-9 1062 74-84-0 1035 74-85-1 1962 74-86-2 1001 74-87-3 1063 74-89-5 1061 74-90-8 1051 74-93-1 1064 74-95-3 2664 74-97-5 1887 74-98-6 1978 74-99-7 1954 7439-90-9 1970 7439-92-1 3288 7439-93-2 1415 7439-95-4 1418 7439-95-4 1869 7439-97-6 2809 7440-01-9 1065 7440-16-6 3089 7440-28-0 1707 7440-29-1 2975 7440-36-0 2871 7440-37-1 1951 7440-38-2 1558 7440-39-3 1400 7440-41-7 1567 7440-43-9 2570 7440-44-0 1361 7440-48-4 3178 7440-58-6 2545 7440-59-7 1963 7440-63-3 2036 7440-66-6 1436 7440-70-2 1401 7446-08-4 2811 7446-09-5 1079 7446-18-6 1707 7487-94-7 1624 75-00-3 1037 75-01-4 1086 75-02-5 1860 75-04-7 1036 75-05-8 1648 75-07-0 1089 75-08-1 2363 75-09-2 1593 75-15-0 1131 75-18-3 1164 75-20-7 1402 75-21-8 1040 75-25-2 2515 75-31-0 1221 75-34-3 2362 75-35-4 1303 75-36-5 1717 75-38-7 1959 75-43-4 1029 75-44-5 1076 75-45-6 1018 75-46-7 1984 75-50-3 1083 75-52-5 1261 75-54-7 1242 75-55-8 1921 75-56-9 1280 75-63-8 1009 75-65-0 1120 75-69-4 1017 75-71-8 1028 75-72-9 1022 75-73-0 1982 75-74-1 1649 75-77-4 1298 75-79-6 1250 75-86-5 1541 75-91-2 3109 75-98-9 1759 7550-45-0 1838 7580-67-8 1414 759-94-4 2992 76-15-3 1020 76-44-8 2761 7601-89-0 1502 7616-94-6 3083 7631-99-4 1498 7632-00-0 1500 7637-07-2 1008 7646-78-8 1827 7647-01-0 1050 7664-38-2 1805 7664-39-3 1052 7664-41-7 1005 7664-93-9 1830 768-52-5 2810 7681-52-9 1791 7697-37-2 2031 77-58-7 2788 77-78-1 1595 7704-34-9 1350 7719-12-2 1809 7722-84-1 2015 7723-14-0 1381 7726-95-6 1744 7727-37-9 1066 7727-37-9 1977 7727-43-7 1564 7757-79-1 1486 7758-01-2 1484 7758-09-0 1488 7761-88-8 1493 7772-99-8 2923 7775-09-9 1495 7778-43-0 1685 7778-44-1 1573 7778-74-7 1489 7779-88-6 1514 7782-41-4 1045 7782-44-7 1072 7782-44-7 1073 7782-49-2 2658 7782-50-5 1017 7782-65-2 2192 7783-00-8 2811 7783-06-4 1053 7783-07-5 2202 7783-35-9 1645 7783-41-7 2190 7783-47-3 3288 7783-54-2 2451 7783-61-1 1859 7783-70-2 1732 7783-79-1 2194 7783-81-5 2978 7784-34-1 1560 7784-40-9 1617 7784-41-0 1677 7784-42-1 2188 7784-44-3 1546 7786-34-7 3018 7789-06-2 2811 7789-30-2 1745 7789-38-0 1494 7790-91-2 1749 7791-23-3 2879 7791-25-5 1834 78-00-2 1649 78-10-4 1292 78-30-8 2574 78-34-2 3018 78-78-4 1265 78-79-5 1218 78-81-9 1214 78-83-1 1212 78-84-2 2045 78-85-3 2396 78-87-5 1279 78-90-0 2258 78-92-2 1120 78-93-3 1193 78-95-5 1695 78-96-6 2735 78-99-9 1992 7803-51-2 2199 7803-52-3 2676 7803-62-5 2203 786-19-6 3018 79-00-5 3082 79-01-6 1710 79-04-9 1752 79-06-1 2074 79-07-2 2811 79-09-4 1848 79-10-7 2218 79-11-8 1751 79-20-9 1231 79-22-1 1238 79-24-3 2842 79-27-6 2504 79-34-5 1702 79-36-7 1765 79-41-4 2531 79-43-6 1764 79-46-9 2608 80-15-9 3109 80-62-6 1247 8001-35-2 2761 8008-20-6 1223 8018-01-7 2771 8052-41-3 1268 81-81-2 3027 811-97-2 3159 818-08-6 3146 822-06-0 2281 83-79-4 2588 836-30-6 1325 84-74-2 3082 85-44-9 2214 86-50-0 2783 86-88-4 1651 867-27-6 3018 87-61-6 2811 87-66-1 2811 87-68-3 2279 87-86-5 3155 873-66-5 2618 88-06-2 2020 88-72-2 1664 88-73-3 1578 88-74-4 1661 88-85-7 2779 88-89-1 0154 89-98-5 1760 90-04-0 2431 90-30-2 2811 90035-08-8 3027 91-22-5 2656 91-64-5 2811 91465-08-6 2588 919-86-8 3018 92-67-1 2811 92-87-5 1885 93-58-3 2938 93-65-2 2765 93-76-5 2765 933-75-5 2020 933-78-8 2020 935-95-5 2020 94-36-0 3102 94-74-6 2765 94-75-7 2765 944-22-9 3018 95-47-6 1307 95-48-7 2076 95-50-1 1591 95-51-2 2019 95-53-4 1708 95-55-6 2512 95-57-8 2021 95-76-1 1590 95-80-7 1709 95-82-9 1590 95-95-4 2020 95266-40-3 2588 957-51-7 2588 96-12-8 2872 96-14-0 1208 96-18-4 2810 96-22-0 1156 96-33-3 1919 97-00-7 1577 97-02-9 1596 97-63-2 2277 98-00-0 2874 98-01-1 1199 98-05-5 1557 98-07-7 2226 98-82-8 1918 98-83-9 2303 98-87-3 1886 98-88-4 1736 98-95-3 1662 99-09-2 1661 99-54-7 1578 99-65-0 1597 99-87-6 2046 99-97-8 2810 99-99-0 1664 999-61-1 1760 --- NEW FILE: Makefile.am --- bin_SCRIPTS = \ add_nsc_number.pl \ add_numbers.pl \ add_smiles.pl \ add_un_nummers.pl \ check_cas_nummers.pl \ check_doublenumbers.pl \ zoek-UNs-zonder-wml.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: add_nsc_number.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_NSC_table_file = "CAS2NSC.txt"; my @CAS_NSC_table; my $nr_NSC_numbers = 0; my $nr_added_NSC_numbers = 0; my $nr_unable = 0; # Read CAS<=>NSC table if (open (FILE, $CAS_NSC_table_file)) { while (<FILE>) { chomp; push (@CAS_NSC_table, $_); } close (FILE) || die "$CAS_NSC_table_file: $!$/"; } else { die "$CAS_NSC_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_NSC_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_NSC_numbers) { print "NSC numbers found : $nr_NSC_numbers$/"; } else { print "No NSC numbers found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_added_NSC_numbers) { print "NSC numbers added : $nr_added_NSC_numbers$/"; print "NSC numbers total : ",$nr_NSC_numbers+$nr_added_NSC_numbers,$/; } else { print "No NSC numbers added.$/"; } sub add_NSC_number_to_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; my $CAS_line = $_; my $NSC_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /NSC-NUMBER/) { push (@XML_data, $next_line); $nr_NSC_numbers++; } else { my $NSC_number; if ($NSC_number = &get_NSC_number($CAS_number)) { $NSC_line =~ s/CAS-NUMBER/NSC-NUMBER/; $NSC_line =~ s/\>.*?\</\>$NSC_number\</; push (@XML_data, $NSC_line); $changed = "True"; } push (@XML_data, $next_line); } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "NSC number added...$/"; $nr_added_NSC_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_NSC_number { my $CAS_number = shift; my $NSC_number = ""; foreach my $line (@CAS_NSC_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $NSC_number = $1; } } return $NSC_number; } --- NEW FILE: add_numbers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; my $number_type = "SMILES"; # Check command line options if (@ARGV < 1) { print "Usage: $0 [-type <TYPE>] <xml-files>$/"; print "\t<TYPE> = (SMILES | UN[-NUMBER] | NSC[-NUMBER])$/"; exit; } elsif ((@ARGV >= 3) && ($ARGV[0] =~ /^-t/i)) { shift @ARGV; $number_type = shift @ARGV; } # Global variables my $CAS_Lookup_table_file = "CAS2SMILES.txt"; my @CAS_Lookup_table; my $nr_numbers = 0; my $nr_added_numbers = 0; my $nr_unable = 0; my $nr_double = 0; if ($number_type =~ /^SMILES/i) { $number_type = "SMILES"; $CAS_Lookup_table_file = "CAS2SMILES.txt"; } elsif ($number_type =~ /^NSC/i) { $number_type = "NSC-NUMBER"; $CAS_Lookup_table_file = "CAS2NSC.txt"; } elsif ($number_type =~ /^UN/i) { $number_type = "UN-NUMBER"; $CAS_Lookup_table_file = "CAS2UN.txt"; } else { print "Error: unrecognized type $number_type!$/"; print "Usage: $0 [-type <TYPE>] <xml-files>$/"; print "\t<TYPE> = (SMILES | UN[-NUMBER] | NSC[-NUMBER])$/"; exit; } # Read CAS<=><TYPE> table if (open (FILE, $CAS_Lookup_table_file)) { while (<FILE>) { chomp; push (@CAS_Lookup_table, $_); } close (FILE) || die "$CAS_Lookup_table_file: $!$/"; } else { die "$CAS_Lookup_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_numbers) { print "$number_type strings found : $nr_numbers$/"; } else { print "No $number_type strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double $number_type strings: $nr_double$/" if ($nr_double); } if ($nr_added_numbers) { print "$number_type strings added : $nr_added_numbers$/"; print "$number_type strings total : ",$nr_numbers+$nr_added_numbers,$/; } else { print "No $number_type strings added.$/"; } sub add_number_to_file { my $file = shift; my @XML_data; my $changed = ""; my $number_found = ""; my $double_number = ""; my @numbers = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; if (!$number_found) { my $CAS_line = $_; my $number_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /$number_type/) { push (@XML_data, $next_line); $number_found = "True"; } else { my $number; if ($number = &get_number($CAS_number)) { $number_line =~ s/CAS-NUMBER/$number_type/; $number_line =~ s/\>.*?\</\>$number\</; push (@numbers, $number); push (@XML_data, $number_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/$number_type/ && /\>(.+?)\</) { push (@numbers, $1); if (!$number_found) { push (@XML_data, $_); $number_found = "True"; } else { $double_number = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_number) { print "Double $number_type string found!"; $nr_numbers++; $nr_double++; $changed = "True"; } elsif ($number_found) { print "No changes needed...$/"; $nr_numbers++; $changed = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; if ($double_number) { print " Attempting to remove...$/"; } else { print "$number_type number added...$/"; $nr_added_numbers++; } } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_number { my $CAS_number = shift; my $number = ""; foreach my $line (@CAS_Lookup_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $number = $1; } } return $number; } --- NEW FILE: add_smiles.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_SMILES_table_file = "CAS2SMILES.txt"; my @CAS_SMILES_table; my $nr_SMILES_numbers = 0; my $nr_added_SMILES_numbers = 0; my $nr_unable = 0; my $nr_double = 0; # Read CAS<=>SMILES table if (open (FILE, $CAS_SMILES_table_file)) { while (<FILE>) { chomp; push (@CAS_SMILES_table, $_); } close (FILE) || die "$CAS_SMILES_table_file: $!$/"; } else { die "$CAS_SMILES_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_SMILES_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_SMILES_numbers) { print "SMILES strings found : $nr_SMILES_numbers$/"; } else { print "No SMILES strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double SMILES strings: $nr_double$/" if ($nr_double); } if ($nr_added_SMILES_numbers) { print "SMILES strings added : $nr_added_SMILES_numbers$/"; print "SMILES strings total : ",$nr_SMILES_numbers+$nr_added_SMILES_numbers,$/; } else { print "No SMILES strings added.$/"; } sub add_SMILES_number_to_file { my $file = shift; my @XML_data; my $changed = ""; my $smiles_found = ""; my $double_smiles = ""; my @smiles = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; if (!$smiles_found) { my $CAS_line = $_; my $SMILES_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /SMILES/) { push (@XML_data, $next_line); $smiles_found = "True"; } else { my $SMILES_number; if ($SMILES_number = &get_SMILES_number($CAS_number)) { $SMILES_line =~ s/CAS-NUMBER/SMILES/; $SMILES_line =~ s/\>.*?\</\>$SMILES_number\</; push (@XML_data, $SMILES_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/SMILES/ && /\>(.+?)\</) { if (!$smiles_found) { push (@XML_data, $_); $smiles_found = "True"; } else { $double_smiles = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_smiles) { print "Double SMILES string found!"; $nr_SMILES_numbers++; $nr_double++; $changed = "True"; } elsif ($smiles_found) { print "No changes needed...$/"; $nr_SMILES_numbers++; $changed = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; if ($double_smiles) { print " Attempting to remove...$/"; } else { print "SMILES number added...$/"; $nr_added_SMILES_numbers++; } } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_SMILES_number { my $CAS_number = shift; my $SMILES_number = ""; foreach my $line (@CAS_SMILES_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $SMILES_number = $1; } } return $SMILES_number; } --- NEW FILE: add_un_nummers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $CAS_UN_table_file = "CAS2UN.txt"; my @CAS_UN_table; my $nr_UN_numbers = 0; my $nr_added_UN_numbers = 0; my $nr_unable = 0; # Read CAS<=>UN table if (open (FILE, $CAS_UN_table_file)) { while (<FILE>) { chomp; push (@CAS_UN_table, $_); } close (FILE) || die "$CAS_UN_table_file: $!$/"; } else { die "$CAS_UN_table_file: $!$/"; } # Loop over all files foreach my $arg (@ARGV) { &add_UN_number_to_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_UN_numbers) { print "UN numbers found : $nr_UN_numbers$/"; } else { print "No UN numbers found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_added_UN_numbers) { print "UN numbers added : $nr_added_UN_numbers$/"; print "UN numbers total : ",$nr_UN_numbers+$nr_added_UN_numbers,$/; } else { print "No UN numbers added.$/"; } sub add_UN_number_to_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/CAS-NUMBER/i && /\>(.+?)\</) { $cas_found = "True"; my $CAS_line = $_; my $UN_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /UN-NUMBER/) { push (@XML_data, $next_line); $nr_UN_numbers++; } else { my $UN_number; if ($UN_number = &get_UN_number($CAS_number)) { $UN_line =~ s/CAS-NUMBER/UN-NUMBER/; $UN_line =~ s/\>.*?\</\>$UN_number\</; push (@XML_data, $UN_line); $changed = "True"; } push (@XML_data, $next_line); } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "UN number added...$/"; $nr_added_UN_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_UN_number { my $CAS_number = shift; my $UN_number = ""; foreach my $line (@CAS_UN_table) { if ($line =~ /^$CAS_number\s*\<\=\>\s*(.*)/) { $UN_number = $1; } } return $UN_number; } --- NEW FILE: check_cas_nummers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check argument line if (@ARGV == 0) { print "Usage: $0 <XML-files>$/"; exit; } # Global variables my $nr_ignored = 0; my $nr_inserted = 0; my $nr_unable = 0; # Loop over all files foreach my $arg (@ARGV) { &check_CAS_and_WOC_numbers_in_file ($arg); } # Print statistics print " ------------------------$/"; if ($nr_ignored + $nr_inserted == 0) { print "All files passed check.$/"; } else { print "Errors ignored : $nr_ignored$/" if ($nr_ignored); print "Unable to changes : $nr_unable$/" if ($nr_unable); print "Check digits inserted: $nr_inserted$/" if ($nr_inserted); } sub check_CAS_and_WOC_numbers_in_file { my $file = shift; my @XML_data; my $changed = ""; print "Processing $file, "; if (open (FILE, $file)) { while (<FILE>) { if (/(CAS|WOC)-NUMBER.*?\>(.+?)\</) { my $type = $1; my $line = $_; my $number = $2; my $check = &check_digit ($number); if (!$check) { print "$/ Wrong check digit or typing error in $type number in $file! Ignoring error...$/ "; $nr_ignored++; #chop ($number); #chop ($number); #$number = &check_digit ($number); #$line =~ s/\>.*?\</\>$number\</; #print "Corrected...$/"; #$nr_corrected++; #$changed = "True"; } elsif ($check =~ /Error/i) { print "$/ $check in $file! Ignoring error...$/ "; $nr_ignored++; } elsif ($check !~ /Correct/i) { print "$/ Missing check digit of $type number in $file!$/ "; $number = &check_digit ($number); $line =~ s/\>.*?\</\>$number\</; $changed = "True"; } push (@XML_data, $line); } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die "$file: $!$/"; print "Check digit inserted...$/"; $nr_inserted++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } --- NEW FILE: check_doublenumbers.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my @NUMBER_count = (); my @NUMBER_doublecount = (); # Loop over all files foreach my $arg (@ARGV) { &check_numbers_in_file ($arg); } # Print statistics print " ------------------$/"; if ($nr_SMILES_numbers) { print "SMILES strings found : $nr_SMILES_numbers$/"; } else { print "No SMILES strings found.$/"; } if ($nr_unable) { print "Unable to changes: $nr_unable$/" if ($nr_unable); } if ($nr_double) { print "Double SMILES strings: $nr_double$/" if ($nr_double); } if ($nr_added_SMILES_numbers) { print "SMILES strings added : $nr_added_SMILES_numbers$/"; print "SMILES strings total : ",$nr_SMILES_numbers+$nr_added_SMILES_numbers,$/; } else { print "No SMILES strings added.$/"; } sub check_numbers_in_file { my $file = shift; my @XML_data; my $changed = ""; my $doubles_found = ""; my $mismatching_doubles_found = ""; my @NUMBERS = (); print "Processing $file. "; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; while (<FILE>) { if (/CHEMICAL/i) { push (@XML_data, $_); $chemical = "True"; } elsif (/INDEX CLASS/i && /\>(.+?)\</) { $number_found = "True"; my $CAS_line = $_; my $SMILES_line = $CAS_line; my $CAS_number = $1; push (@XML_data, $CAS_line); my $next_line = <FILE> if (!eof(FILE)); if ($next_line =~ /SMILES/) { push (@XML_data, $next_line); $smiles_found = "True"; } else { my $SMILES_number; if ($SMILES_number = &get_SMILES_number($CAS_number)) { $SMILES_line =~ s/CAS-NUMBER/SMILES/; $SMILES_line =~ s/\>.*?\</\>$SMILES_number\</; push (@XML_data, $SMILES_line); $changed = "True"; } push (@XML_data, $next_line); } } } elsif (/SMILES/ && /\>(.+?)\</) { if (!$smiles_found) { push (@XML_data, $_); $smiles_found = "True"; } else { $double_smiles = "True"; } } elsif (/WOC-NUMBER/ && /\>(.+?)\</) { push (@XML_data, $_); $cas_found = "True"; } else { push (@XML_data, $_); } } print "No CAS number found!! " if ($chemical && !$cas_found); close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; } if ($double_smiles) { print "Double SMILES string found...$/"; $nr_SMILES_numbers++; $nr_double++; } elsif ($changed && $smiles_found) { print "No changes needed...$/"; $nr_SMILES_numbers++; } elsif ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "SMILES number added...$/"; $nr_added_SMILES_numbers++; } else { warn "$file: $!$/"; print "Unable to change...$/"; $nr_unable++; } } else { print "No changes...$/"; } } sub get_SMILES_number { my $CAS_number = shift; my $SMILES_number = ""; foreach my $line (@CAS_SMILES_table) { if ($line =~ /^$CAS_number\s*(.*)/) { $SMILES_number = $1; } } return $SMILES_number; } --- NEW FILE: zoek-UNs-zonder-wml.pl.in --- #! @PATHTOPERL@ -w use strict; use diagnostics; my $debug = ""; my $wmldir = "../../web-docs/data/wml"; my $existsfile = "tmp-nowml-exists"; # Check command line options #if (@ARGV == 0) { # print "Usage: $0 <xml-files>$/"; # exit; #} # Global variables my $CAS_UN_table_file = "CAS-UN.table"; my @CAS_UN_table; my $nr_UN_numbers = 0; my $nr_added_UN_numbers = 0; my $nr_unable = 0; # Read CAS<=>UN table if (open (FILE, $CAS_UN_table_file)) { <FILE>; # skip first line (comment) # while (<FILE>) { chomp; push (@CAS_UN_table, $_); } close (FILE) || die "$CAS_UN_table_file: $!$/"; } else { die "$CAS_UN_table_file: $!$/"; } &create_existskeyfile ("NUMBER"); foreach my $line (@CAS_UN_table) { my ($cas) = split (/\s*<=>\s*/, $line); my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { print "NUMBER found:\t$nummatch$/" if ($debug); } else { print "Dangling CAS/UN pair: $line$/"; } } &remove_existskeyfile ("NUMBER"); sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } |