Update of /cvsroot/woc/woc/src/woc/src/check In directory usw-pr-cvs1:/tmp/cvs-serv23171/woc/src/check Added Files: Makefile.am check_filenames.pl check_filenames.pl.in list-cas.pl list-cas.pl.in test-wmls-op-wellformed.pl test-wmls-op-wellformed.pl.in voeg-fysische-gegevens-toe.pl voeg-fysische-gegevens-toe.pl.in voeg-gif-mime-toe.pl voeg-gif-mime-toe.pl.in voeg-pdb-mime-toe.pl voeg-pdb-mime-toe.pl.in voeg-woc-nummer-toe.pl voeg-woc-nummer-toe.pl.in voeg_stylesheet_toe.pl voeg_stylesheet_toe.pl.in zet-fysisch-props-op-extensie-xml.pl zet-fysisch-props-op-extensie-xml.pl.in zet-wmls-op-ISO-8859-1-encoding.pl zet-wmls-op-ISO-8859-1-encoding.pl.in zoek-cas-bij-pdbs-voor-dadml.pl zoek-cas-bij-pdbs-voor-dadml.pl.in zoek-chems-zonder-cas.pl zoek-chems-zonder-cas.pl.in zoek-chems-zonder-fysische-props.pl zoek-chems-zonder-fysische-props.pl.in zoek-pdbs-zonder-wml.pl zoek-pdbs-zonder-wml.pl.in zoek-wmls-bij-groepen.pl zoek-wmls-bij-groepen.pl.in zoek-wmls-zonder-giflink.pl zoek-wmls-zonder-giflink.pl.in zoek-xmls-met-cas-zonder-2dcml.pl zoek-xmls-met-cas-zonder-2dcml.pl.in Log Message: --- NEW FILE: Makefile.am --- bindir=${prefix}/bin/check bin_SCRIPTS = \ check_filenames.pl \ list-cas.pl \ test-wmls-op-wellformed.pl \ voeg-fysische-gegevens-toe.pl \ voeg-gif-mime-toe.pl \ voeg-pdb-mime-toe.pl \ voeg-woc-nummer-toe.pl \ voeg_stylesheet_toe.pl \ zet-fysisch-props-op-extensie-xml.pl \ zet-wmls-op-ISO-8859-1-encoding.pl \ zoek-cas-bij-pdbs-voor-dadml.pl \ zoek-chems-zonder-cas.pl \ zoek-chems-zonder-fysische-props.pl \ zoek-pdbs-zonder-wml.pl \ zoek-wmls-bij-groepen.pl \ zoek-wmls-zonder-giflink.pl \ zoek-xmls-met-cas-zonder-2dcml.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: check_filenames.pl --- #! /usr/bin/perl -w use strict; if (scalar(@ARGV) == 0) { print "syntax: check_filenames.pl <*.wml>$/"; exit 0; } my @wmlfiles = @ARGV; my %accepted_codes = (); my $filecount = 0; my $filesparsed = 0; my $codesadded = 0; my $hasid = 0; my $wrongid = 0; foreach my $file (@wmlfiles) { $filecount++; my @content = (); my $changed = 0; my $correctcode = $file; $correctcode =~ s/.*\/(.*).xml/$1/; if (open (OPENFILE, "<$file")) { print "Checking $file ($correctcode)...$/"; @content = <OPENFILE>; for (my $i=0; $i < scalar(@content); $i++) { my $line = $content[$i]; if ($line =~ /<ITEM(.*?)>/i) { $filesparsed++; my $args = $1; if ($args =~ /CODE=\"(.*?)\"/) { $hasid++; my $code = $1; $code =~ s/\n//g; print " Code: $code$/"; $accepted_codes{"$code"} = "yes"; if ($code ne $correctcode) { $wrongid++; print " -> it should be: $correctcode$/"; print " Old: $line"; my $newline = "<ITEM$args CODE=\"$correctcode\">$/"; print " New: $newline"; $content[$i] = $newline; $changed = 1; } } else { # should correct this file print " Old: $line"; my $newline = "<ITEM$args CODE=\"$correctcode\">$/"; print " New: $newline"; $accepted_codes{"$correctcode"} = "yes"; $content[$i] = $newline; $changed = 1; } } } close(OPENFILE); } if ($changed) { if (open(OPENFILE, ">$file")) { $codesadded++; print OPENFILE @content; } else { print "ERROR: file $file could not be saved!"; } } } # now come the process of checking references my $wrong_group = ""; foreach my $file (@wmlfiles) { my @content = (); my $correctcode = $file; $correctcode =~ s/.*\/(.*).xml/$1/; if (open (OPENFILE, "<$file")) { print "Checking references in $file ($correctcode)...$/"; @content = <OPENFILE>; foreach my $line (@content) { if ($line =~ /<GROUP>(.*)<\/GROUP>/i) { my $group = $1; print " checking GROUP $group...$/"; if (!($accepted_codes{"$group"})) { print " ERROR: unknown GROUP $group$/"; } } } } } print "----------------------------------------$/"; print " files : $filecount$/"; print " checked : $filesparsed$/"; print " code : $hasid$/"; print " corrected: $wrongid$/"; print " added : $codesadded$/"; print "----------------------------------------$/"; --- NEW FILE: check_filenames.pl.in --- #! @PATHTOPERL@ -w use strict; if (scalar(@ARGV) == 0) { print "syntax: check_filenames.pl <*.wml>$/"; exit 0; } my @wmlfiles = @ARGV; my %accepted_codes = (); my $filecount = 0; my $filesparsed = 0; my $codesadded = 0; my $hasid = 0; my $wrongid = 0; foreach my $file (@wmlfiles) { $filecount++; my @content = (); my $changed = 0; my $correctcode = $file; $correctcode =~ s/.*\/(.*).xml/$1/; if (open (OPENFILE, "<$file")) { print "Checking $file ($correctcode)...$/"; @content = <OPENFILE>; for (my $i=0; $i < scalar(@content); $i++) { my $line = $content[$i]; if ($line =~ /<ITEM(.*?)>/i) { $filesparsed++; my $args = $1; if ($args =~ /CODE=\"(.*?)\"/) { $hasid++; my $code = $1; $code =~ s/\n//g; print " Code: $code$/"; $accepted_codes{"$code"} = "yes"; if ($code ne $correctcode) { $wrongid++; print " -> it should be: $correctcode$/"; print " Old: $line"; my $newline = "<ITEM$args CODE=\"$correctcode\">$/"; print " New: $newline"; $content[$i] = $newline; $changed = 1; } } else { # should correct this file print " Old: $line"; my $newline = "<ITEM$args CODE=\"$correctcode\">$/"; print " New: $newline"; $accepted_codes{"$correctcode"} = "yes"; $content[$i] = $newline; $changed = 1; } } } close(OPENFILE); } if ($changed) { if (open(OPENFILE, ">$file")) { $codesadded++; print OPENFILE @content; } else { print "ERROR: file $file could not be saved!"; } } } # now come the process of checking references my $wrong_group = ""; foreach my $file (@wmlfiles) { my @content = (); my $correctcode = $file; $correctcode =~ s/.*\/(.*).xml/$1/; if (open (OPENFILE, "<$file")) { print "Checking references in $file ($correctcode)...$/"; @content = <OPENFILE>; foreach my $line (@content) { if ($line =~ /<GROUP>(.*)<\/GROUP>/i) { my $group = $1; print " checking GROUP $group...$/"; if (!($accepted_codes{"$group"})) { print " ERROR: unknown GROUP $group$/"; } } } } } print "----------------------------------------$/"; print " files : $filecount$/"; print " checked : $filesparsed$/"; print " code : $hasid$/"; print " corrected: $wrongid$/"; print " added : $codesadded$/"; print "----------------------------------------$/"; --- NEW FILE: list-cas.pl --- #! /usr/bin/perl -w # # Make a list of all cas-nummers of which compounds are given # use strict; use diagnostics; my $debug = ""; my $wmldir = "../../data/wml"; opendir (DIR, $wmldir) || die "$wmldir: $!$/"; my @files = grep {/\.xml/i} readdir(DIR); closedir (DIR) || die "$wmldir: $!$/"; foreach my $file (@files) { my $cas = `grep -i CAS-NUMBER $wmldir/$file`; if ($cas) { chomp ($cas); $cas =~ s/\s*<INDEX.*?>\s*//ig; $cas =~ s/\s*<\/INDEX.*?>\s*//ig; print "$cas$/"; } } --- NEW FILE: list-cas.pl.in --- #! @PATHTOPERL@ -w # # Make a list of all cas-nummers of which compounds are given # use strict; use diagnostics; my $debug = ""; my $wmldir = "../../data/wml"; opendir (DIR, $wmldir) || die "$wmldir: $!$/"; my @files = grep {/\.xml/i} readdir(DIR); closedir (DIR) || die "$wmldir: $!$/"; foreach my $file (@files) { my $cas = `grep -i CAS-NUMBER $wmldir/$file`; if ($cas) { chomp ($cas); $cas =~ s/\s*<INDEX.*?>\s*//ig; $cas =~ s/\s*<\/INDEX.*?>\s*//ig; print "$cas$/"; } } --- NEW FILE: test-wmls-op-wellformed.pl --- #! /usr/bin/perl -w use strict; if (! @ARGV) { die "Usage: $0 <xml-files>$/"; } use XML::Parser; my $parser = new XML::Parser(ErrorContext => 2); my $nr_files = @ARGV; my $nr_notexist = 0; my $nr_processed = 0; my $nr_warnings = 0; my $nr_ignored = 0; my $nr_okay = 0; foreach my $file (@ARGV) { if (! (-e $file)) { warn "$file: does not exist!!!$/"; $nr_notexist++; } else { print "Processing $file... "; eval { $parser->parsefile("$file"); }; if ($@) { if ($@ =~ /<sub>/i) { print "Probably found a <SUB> tag inside an attribute, ignoring...$/"; $nr_ignored++; } elsif ($@ =~ /<i>/i) { print "Probably found a <I> tag inside an attribute, ignoring...$/"; $nr_ignored++; } else { warn "$/$@$/"; $nr_warnings++; } } else { print "Okay!$/"; $nr_okay++; } $nr_processed++; } } if ($nr_files) { print " -------------------------$/"; print "Files processed : $nr_files$/"; if ($nr_warnings || $nr_ignored) { print " checked okay : $nr_okay$/"; print " warnings : $nr_warnings$/" if $nr_warnings; print " ignored warnings : $nr_ignored$/" if $nr_ignored; } else { print " all files checked okay!$/"; } print "Non-existing files : $nr_notexist$/" if ($nr_notexist); } else { print "No files to process!$/"; } --- NEW FILE: test-wmls-op-wellformed.pl.in --- #! @PATHTOPERL@ -w use strict; if (! @ARGV) { die "Usage: $0 <xml-files>$/"; } use XML::Parser; my $parser = new XML::Parser(ErrorContext => 2); my $nr_files = @ARGV; my $nr_notexist = 0; my $nr_processed = 0; my $nr_warnings = 0; my $nr_ignored = 0; my $nr_okay = 0; foreach my $file (@ARGV) { if (! (-e $file)) { warn "$file: does not exist!!!$/"; $nr_notexist++; } else { print "Processing $file... "; eval { $parser->parsefile("$file"); }; if ($@) { if ($@ =~ /<sub>/i) { print "Probably found a <SUB> tag inside an attribute, ignoring...$/"; $nr_ignored++; } elsif ($@ =~ /<i>/i) { print "Probably found a <I> tag inside an attribute, ignoring...$/"; $nr_ignored++; } else { warn "$/$@$/"; $nr_warnings++; } } else { print "Okay!$/"; $nr_okay++; } $nr_processed++; } } if ($nr_files) { print " -------------------------$/"; print "Files processed : $nr_files$/"; if ($nr_warnings || $nr_ignored) { print " checked okay : $nr_okay$/"; print " warnings : $nr_warnings$/" if $nr_warnings; print " ignored warnings : $nr_ignored$/" if $nr_ignored; } else { print " all files checked okay!$/"; } print "Non-existing files : $nr_notexist$/" if ($nr_notexist); } else { print "No files to process!$/"; } --- NEW FILE: voeg-fysische-gegevens-toe.pl --- #! /usr/bin/perl -w use strict; my $debug = "yes"; my $wmldir = "../../data/wml"; my $physpropdir = "../../data/dadml/physicalprop/cml"; my $existsfile = "tmp-physprop-exists-"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; opendir (physpropDIR, $physpropdir) || die "$physpropdir: $!$/"; my @physpropfiles = grep {/\.xml/i} readdir(physpropDIR); closedir (physpropDIR) || die "$physpropdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("PhysicalProp"); # Loop over all files foreach my $physpropfile (@physpropfiles) { my $cas = $physpropfile; $cas =~ s/\.xml//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("PhysicalProp" => $cas)); if ($linkmatch) { $nr_already_linked++; print "physprop-link found:\t$linkmatch$/" if ($debug); } else { print "$cas.xml can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { my ($wmlfile) = split (":", $nummatch); print "Going to chage $wmldir/$wmlfile and add $cas...$/" if $debug; if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling physprop-file $cas.xml$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("PhysicalProp"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @physpropfiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : ", $nr_skipped_links,$/ if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; } else { print " Already linked matches : $nr_already_linked$/"; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingphysprops = @physpropfiles - $nr_foundCAS; print " Dangling files found : $nr_danglingphysprops$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; if (open (FILE, $file)) { while (<FILE>) { if (/<\/ITEM/i) { push (@XML_data, " <INSERT MIME=\"chemical/cml\" CLASS=\"PhysicalProperties\">$cas.xml</INSERT>$/"); push (@XML_data, $_); $changed = "True"; } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } print " about to store changed file...$/" if $debug; if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "INSERT-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-fysische-gegevens-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = "yes"; my $wmldir = "../../data/wml"; my $physpropdir = "../../data/dadml/physicalprop/cml"; my $existsfile = "tmp-physprop-exists-"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; opendir (physpropDIR, $physpropdir) || die "$physpropdir: $!$/"; my @physpropfiles = grep {/\.xml/i} readdir(physpropDIR); closedir (physpropDIR) || die "$physpropdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("PhysicalProp"); # Loop over all files foreach my $physpropfile (@physpropfiles) { my $cas = $physpropfile; $cas =~ s/\.xml//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("PhysicalProp" => $cas)); if ($linkmatch) { $nr_already_linked++; print "physprop-link found:\t$linkmatch$/" if ($debug); } else { print "$cas.xml can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { my ($wmlfile) = split (":", $nummatch); print "Going to chage $wmldir/$wmlfile and add $cas...$/" if $debug; if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling physprop-file $cas.xml$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("PhysicalProp"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @physpropfiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : ", $nr_skipped_links,$/ if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; } else { print " Already linked matches : $nr_already_linked$/"; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingphysprops = @physpropfiles - $nr_foundCAS; print " Dangling files found : $nr_danglingphysprops$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; if (open (FILE, $file)) { while (<FILE>) { if (/<\/ITEM/i) { push (@XML_data, " <INSERT MIME=\"chemical/cml\" CLASS=\"PhysicalProperties\">$cas.xml</INSERT>$/"); push (@XML_data, $_); $changed = "True"; } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } print " about to store changed file...$/" if $debug; if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "INSERT-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-gif-mime-toe.pl --- #! /usr/bin/perl -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $gifdir = "../../data/dadml/2d/gif"; my $existsfile = "tmp-gifmime-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; my $nr_dadmlpath_added = 0; opendir (GIFDIR, $gifdir) || die "$gifdir: $!$/"; my @giffiles = grep {/\.gif/i} readdir(GIFDIR); closedir (GIFDIR) || die "$gifdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("gif"); # Loop over all files foreach my $giffile (@giffiles) { my $cas = $giffile; $cas =~ s/\.gif//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { my ($wmlfile) = split (":", $nummatch); $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("gif" => $cas)); if ($linkmatch) { #print "$cas.gif already linked!!:\t$nummatch$/"; $nr_already_linked++; print "GIF-link found:\t$linkmatch$/" if ($debug); if ($linkmatch !~ /PATH/i) { #print "\tPATH-attribuut toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { &add_dadml_path ("$wmldir/$wmlfile", $cas); $nr_dadmlpath_added++; #} } } else { print "$cas.gif can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling GIF-file $cas.gif$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("gif"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @giffiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : $nr_skipped_links$/" if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; print " PATH-attibutes added : $nr_dadmlpath_added$/" if $nr_dadmlpath_added; } else { print " Already linked matches : $nr_already_linked$/"; print " PATH-attibutes added : $nr_dadmlpath_added$/" if $nr_dadmlpath_added; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingGIFs = @giffiles - $nr_foundCAS; print " Dangling files found : $nr_danglingGIFs$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; my $has_WOC = `grep -i "<WOC>" $file`; my $has_DICT = `grep -i "<DICT>" $file` if (! $has_WOC); my $has_INDEX = `grep -i "<INDEX" $file` if (! $has_WOC); my $last_was_INDEX = ""; my $indent = ""; if (open (FILE, $file)) { if ($has_WOC) { while (<FILE>) { if (/^(\s*)<WOC>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } elsif ($has_INDEX) { while (<FILE>) { if (/^(\s*)<INDEX/i) { $indent = $1; push (@XML_data, $_); $last_was_INDEX = "True"; } elsif ($last_was_INDEX && ! /<INDEX/i) { push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); push (@XML_data, $_); $changed = "True"; $last_was_INDEX = ""; } else { push (@XML_data, $_); } } } elsif ($has_DICT) { while (<FILE>) { if (/^(\s*)<\/DICT>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } else { while (<FILE>) { if (/^(\s*)<ITEM/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "MEDIA-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } sub add_dadml_path { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; if (open (FILE, $file)) { while (<FILE>) { if (/MEDIA.*image\/gif.*$cas/i) { s/MIME=\"image\/gif\">/MIME=\"image\/gif\" PATH=\"dadml\">/ig; push (@XML_data, $_); $changed = "True"; } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "PATH-attribute added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-gif-mime-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $gifdir = "../../data/dadml/2d/gif"; my $existsfile = "tmp-gifmime-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; my $nr_dadmlpath_added = 0; opendir (GIFDIR, $gifdir) || die "$gifdir: $!$/"; my @giffiles = grep {/\.gif/i} readdir(GIFDIR); closedir (GIFDIR) || die "$gifdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("gif"); # Loop over all files foreach my $giffile (@giffiles) { my $cas = $giffile; $cas =~ s/\.gif//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { my ($wmlfile) = split (":", $nummatch); $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("gif" => $cas)); if ($linkmatch) { #print "$cas.gif already linked!!:\t$nummatch$/"; $nr_already_linked++; print "GIF-link found:\t$linkmatch$/" if ($debug); if ($linkmatch !~ /PATH/i) { #print "\tPATH-attribuut toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { &add_dadml_path ("$wmldir/$wmlfile", $cas); $nr_dadmlpath_added++; #} } } else { print "$cas.gif can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling GIF-file $cas.gif$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("gif"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @giffiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : $nr_skipped_links$/" if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; print " PATH-attibutes added : $nr_dadmlpath_added$/" if $nr_dadmlpath_added; } else { print " Already linked matches : $nr_already_linked$/"; print " PATH-attibutes added : $nr_dadmlpath_added$/" if $nr_dadmlpath_added; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingGIFs = @giffiles - $nr_foundCAS; print " Dangling files found : $nr_danglingGIFs$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; my $has_WOC = `grep -i "<WOC>" $file`; my $has_DICT = `grep -i "<DICT>" $file` if (! $has_WOC); my $has_INDEX = `grep -i "<INDEX" $file` if (! $has_WOC); my $last_was_INDEX = ""; my $indent = ""; if (open (FILE, $file)) { if ($has_WOC) { while (<FILE>) { if (/^(\s*)<WOC>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } elsif ($has_INDEX) { while (<FILE>) { if (/^(\s*)<INDEX/i) { $indent = $1; push (@XML_data, $_); $last_was_INDEX = "True"; } elsif ($last_was_INDEX && ! /<INDEX/i) { push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); push (@XML_data, $_); $changed = "True"; $last_was_INDEX = ""; } else { push (@XML_data, $_); } } } elsif ($has_DICT) { while (<FILE>) { if (/^(\s*)<\/DICT>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } else { while (<FILE>) { if (/^(\s*)<ITEM/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"image/gif\" PATH=\"dadml\">$cas.gif</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "MEDIA-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } sub add_dadml_path { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; if (open (FILE, $file)) { while (<FILE>) { if (/MEDIA.*image\/gif.*$cas/i) { s/MIME=\"image\/gif\">/MIME=\"image\/gif\" PATH=\"dadml\">/ig; push (@XML_data, $_); $changed = "True"; } else { push (@XML_data, $_); } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "PATH-attribute added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-pdb-mime-toe.pl --- #! /usr/bin/perl -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/dadml/3d/pdb"; my $existsfile = "tmp-pdbmime-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("x-pdb"); # Loop over all files foreach my $pdbfile (@pdbfiles) { my $cas = $pdbfile; $cas =~ s/\.pdb//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("x-pdb" => $cas)); if ($linkmatch) { $nr_already_linked++; print "PDB-link found:\t$linkmatch$/" if ($debug); } else { print "$cas.pdb can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { my ($wmlfile) = split (":", $nummatch); if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling PDB-file $cas.pdb$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("x-pdb"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @pdbfiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : ", $nr_skipped_links,$/ if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; } else { print " Already linked matches : $nr_already_linked$/"; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingPDBs = @pdbfiles - $nr_foundCAS; print " Dangling files found : $nr_danglingPDBs$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; my $has_WOC = `grep -i "<WOC>" $file`; my $has_DICT = `grep -i "<DICT>" $file` if (! $has_WOC); my $has_INDEX = `grep -i "<INDEX" $file` if (! $has_WOC); my $last_was_INDEX = ""; my $indent = ""; if (open (FILE, $file)) { if ($has_WOC) { while (<FILE>) { if (/^(\s*)<WOC>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } elsif ($has_INDEX) { while (<FILE>) { if (/^(\s*)<INDEX/i) { $indent = $1; push (@XML_data, $_); $last_was_INDEX = "True"; } elsif ($last_was_INDEX && ! /<INDEX/i) { push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); push (@XML_data, $_); $changed = "True"; $last_was_INDEX = ""; } else { push (@XML_data, $_); } } } elsif ($has_DICT) { while (<FILE>) { if (/^(\s*)<\/DICT>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } else { while (<FILE>) { if (/^(\s*)<ITEM/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "MEDIA-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-pdb-mime-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/dadml/3d/pdb"; my $existsfile = "tmp-pdbmime-exists"; # tellers my $nr_foundCAS = 0; my $nr_already_linked = 0; my $nr_gelinkt = 0; my $nr_mislukte_links = 0; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("NUMBER"); &create_existskeyfile ("x-pdb"); # Loop over all files foreach my $pdbfile (@pdbfiles) { my $cas = $pdbfile; $cas =~ s/\.pdb//ig; my $nummatch = &exists (("NUMBER" => $cas)); if ($nummatch) { $nr_foundCAS++; print "NUMBER found:\t$nummatch$/" if ($debug); my $linkmatch = &exists (("x-pdb" => $cas)); if ($linkmatch) { $nr_already_linked++; print "PDB-link found:\t$linkmatch$/" if ($debug); } else { print "$cas.pdb can be linked!!:\t$nummatch$/"; #print "\tMEDIA-tag toevoegen aan wml? "; #$_ = <STDIN>; #if (/^(j|y)/i) { my ($wmlfile) = split (":", $nummatch); if (&voeg_link_toe("$wmldir/$wmlfile", $cas)) { $nr_gelinkt++; } else { $nr_mislukte_links++; } #} } } else { print "Dangling PDB-file $cas.pdb$/"; } } &remove_existskeyfile ("NUMBER"); &remove_existskeyfile ("x-pdb"); # Print statistics print " ----------------------------$/"; print "Files checked : ",scalar @pdbfiles, $/; if ($nr_foundCAS) { print " CAS or WOC number found : $nr_foundCAS$/"; my $nr_linkable = $nr_foundCAS - $nr_already_linked; if ($nr_linkable) { print " Linkable matches : ", $nr_foundCAS - $nr_already_linked,$/; print " Added links : $nr_gelinkt$/" if $nr_gelinkt; print " Links failed to add : $nr_mislukte_links$/" if $nr_mislukte_links; my $nr_skipped_links = $nr_linkable - $nr_gelinkt - $nr_mislukte_links; print " Skipped links : ", $nr_skipped_links,$/ if $nr_skipped_links; print " Already linked matches : $nr_already_linked$/"; } else { print " Already linked matches : $nr_already_linked$/"; print " No linkable matches...$/"; } } else { print "Not one file name matched an item name...$/"; } my $nr_danglingPDBs = @pdbfiles - $nr_foundCAS; print " Dangling files found : $nr_danglingPDBs$/"; sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } sub voeg_link_toe { my $file = shift; my $cas = shift; my $succes = "True"; my @XML_data; my $changed = ""; print "Processing $file "; my $has_WOC = `grep -i "<WOC>" $file`; my $has_DICT = `grep -i "<DICT>" $file` if (! $has_WOC); my $has_INDEX = `grep -i "<INDEX" $file` if (! $has_WOC); my $last_was_INDEX = ""; my $indent = ""; if (open (FILE, $file)) { if ($has_WOC) { while (<FILE>) { if (/^(\s*)<WOC>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } elsif ($has_INDEX) { while (<FILE>) { if (/^(\s*)<INDEX/i) { $indent = $1; push (@XML_data, $_); $last_was_INDEX = "True"; } elsif ($last_was_INDEX && ! /<INDEX/i) { push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); push (@XML_data, $_); $changed = "True"; $last_was_INDEX = ""; } else { push (@XML_data, $_); } } } elsif ($has_DICT) { while (<FILE>) { if (/^(\s*)<\/DICT>/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } else { while (<FILE>) { if (/^(\s*)<ITEM/i) { $indent = $1; push (@XML_data, $_); push (@XML_data, "$indent<WOC>$/"); push (@XML_data, "$indent <MEDIA MIME=\"chemical/x-pdb\">$cas.pdb</MEDIA>$/"); push (@XML_data, "$indent</WOC>$/"); $changed = "True"; } else { push (@XML_data, $_); } } } close (FILE) || warn "$file: $!$/"; } else { warn "$file: $!$/"; $succes = ""; } if ($changed) { if (open (FILE, ">$file")) { foreach my $line (@XML_data) { print FILE $line; } close (FILE) || die $!; print "MEDIA-tag added...$/"; } else { warn "$file: $!$/"; print "Unable to change...$/"; $succes = ""; } } else { print "No changes...$/"; } return ($succes); } --- NEW FILE: voeg-woc-nummer-toe.pl --- #! /usr/bin/perl -w use strict; my $wmldir = "../../data/wml"; my @item_start_elements = `grep -i "ITEM" $wmldir/*.xml | grep -i "ID="`; # make a list of current ID's my @idlist; foreach my $elem (@item_start_elements) { $elem =~ /ID=\"WOC(.*?)\".*/i; #print $1 . "$/"; push (@idlist, $1); } # sort thislist @idlist = sort @idlist; # what is the highest ID? my $highestid = $idlist[-1]; #print "Highest in use: WOC" . $highestid ."$/"; my $nextfree = sprintf "%08i", $highestid + 1; print "First free: WOC" . $nextfree ."$/"; # next step is to browse trough all files and add numbers my @wmlfiles = <$wmldir/*.xml>; foreach my $file (@wmlfiles) { my $mustbeupdated = 0; if (open (OPENFILE, $file)) { while ( <OPENFILE> ) { if (/<ITEM(.*?)>/) { # oke start element ITEM found if ($1 =~ /ID=\"WOC(.*?)\".*/) { # oke, has ID at this moment... print it print "ID found in $file: $1$/"; } else { # oke, no ID at this moment... add it... $mustbeupdated = 1; } } } } if ($mustbeupdated) { print "Updating $file...$/"; my @inputfile = (); if (open (OPENFILE, $file)) { while (<OPENFILE> ) { if (/(<ITEM\ )(.*>)/) { $_ = "$1ID=\"WOC$nextfree\" $2$/"; # raise next free ID $nextfree = sprintf "%08i", $nextfree + 1; } push (@inputfile, $_); } if (open (OPENFILE, ">$file")) { print OPENFILE @inputfile; } } } } --- NEW FILE: voeg-woc-nummer-toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $wmldir = "../../data/wml"; my @item_start_elements = `grep -i "ITEM" $wmldir/*.xml | grep -i "ID="`; # make a list of current ID's my @idlist; foreach my $elem (@item_start_elements) { $elem =~ /ID=\"WOC(.*?)\".*/i; #print $1 . "$/"; push (@idlist, $1); } # sort thislist @idlist = sort @idlist; # what is the highest ID? my $highestid = $idlist[-1]; #print "Highest in use: WOC" . $highestid ."$/"; my $nextfree = sprintf "%08i", $highestid + 1; print "First free: WOC" . $nextfree ."$/"; # next step is to browse trough all files and add numbers my @wmlfiles = <$wmldir/*.xml>; foreach my $file (@wmlfiles) { my $mustbeupdated = 0; if (open (OPENFILE, $file)) { while ( <OPENFILE> ) { if (/<ITEM(.*?)>/) { # oke start element ITEM found if ($1 =~ /ID=\"WOC(.*?)\".*/) { # oke, has ID at this moment... print it print "ID found in $file: $1$/"; } else { # oke, no ID at this moment... add it... $mustbeupdated = 1; } } } } if ($mustbeupdated) { print "Updating $file...$/"; my @inputfile = (); if (open (OPENFILE, $file)) { while (<OPENFILE> ) { if (/(<ITEM\ )(.*>)/) { $_ = "$1ID=\"WOC$nextfree\" $2$/"; # raise next free ID $nextfree = sprintf "%08i", $nextfree + 1; } push (@inputfile, $_); } if (open (OPENFILE, ">$file")) { print OPENFILE @inputfile; } } } } --- NEW FILE: voeg_stylesheet_toe.pl --- #! /usr/bin/perl -w use strict; my $dir = "../../data/dadml/fysisch/cml"; # next step is to browse trough all files and add numbers my @files = <$dir/*.xml>; my $insert = "<\?xml-stylesheet type=\"text\/xsl\" href=\"http:\/\/www.sci.kun.nl\/woc\/data\/dadml\/fysisch\/cml\/cml.xsl\" \?>\n"; foreach my $file (@files) { my $mustbeupdated = 1; if (open (OPENFILE, $file)) { while ( <OPENFILE> ) { if (/<\?xml-stylesheet/) { $mustbeupdated = 0; } } } if ($mustbeupdated) { print "Updating $file...$/"; my @inputfile = (); if (open (OPENFILE, $file)) { while (<OPENFILE> ) { if (/<molecule/i) { push (@inputfile, $insert); } push (@inputfile, $_); } if (open (OPENFILE, ">$file")) { print OPENFILE @inputfile; } } } } --- NEW FILE: voeg_stylesheet_toe.pl.in --- #! @PATHTOPERL@ -w use strict; my $dir = "../../data/dadml/fysisch/cml"; # next step is to browse trough all files and add numbers my @files = <$dir/*.xml>; my $insert = "<\?xml-stylesheet type=\"text\/xsl\" href=\"http:\/\/www.sci.kun.nl\/woc\/data\/dadml\/fysisch\/cml\/cml.xsl\" \?>\n"; foreach my $file (@files) { my $mustbeupdated = 1; if (open (OPENFILE, $file)) { while ( <OPENFILE> ) { if (/<\?xml-stylesheet/) { $mustbeupdated = 0; } } } if ($mustbeupdated) { print "Updating $file...$/"; my @inputfile = (); if (open (OPENFILE, $file)) { while (<OPENFILE> ) { if (/<molecule/i) { push (@inputfile, $insert); } push (@inputfile, $_); } if (open (OPENFILE, ">$file")) { print OPENFILE @inputfile; } } } } --- NEW FILE: zet-fysisch-props-op-extensie-xml.pl --- #! /usr/bin/perl -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $nr_changed = 0; ### de lange versie van opendir ### #opendir (WMLDIR, $wmldir) || die "WMLDIR: $!$/"; #my @wmlfiles = grep {/\.xml/i} readdir(WMLDIR); #closedir (WMLDIR) || die "$wmldir: $!$/"; ### de korte versie van opendir ### #my @wmlfiles = <$wmldir/*.xml>; ### Loop over all files ### # met een foreach over @ARGV kun je het script aanroepen met ../wml/*.xml # dit wordt door de shell al uitgevouwen tot ../wml/azijnzuur.xml etc.. foreach my $wmlfile (@ARGV) { my @file = (); my $change_needed = ""; print "Reading $wmlfile... "; open (FILE, "<$wmlfile") || die "$wmlfile: $!$/"; while (<FILE>) { my $line = $_; if ($line =~ m/<INSERT.*?>(.*?)\.cml<\/INSERT>/) { my $casnummer = $1; $change_needed = "yes"; print "new extension necessary ($casnummer, $line)..."; push (@file, " <INSERT MIME=\"chemical/cml\" CLASS=\"PhysicalProperties\">$casnummer.xml<\/INSERT>$/"); } else { push (@file, $line); } } close(FILE); if ($change_needed eq "yes") { open (OUT, ">$wmldir/$wmlfile") || die "File $wmlfile not writeable!$/"; foreach my $line (@file) { print OUT $line; } close(OUT); print "Changed!$/"; } else { print "Nothing done!$/"; } } print "-----------------$/"; print "Files changed: $nr_changed$/"; --- NEW FILE: zet-fysisch-props-op-extensie-xml.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $nr_changed = 0; ### de lange versie van opendir ### #opendir (WMLDIR, $wmldir) || die "WMLDIR: $!$/"; #my @wmlfiles = grep {/\.xml/i} readdir(WMLDIR); #closedir (WMLDIR) || die "$wmldir: $!$/"; ### de korte versie van opendir ### #my @wmlfiles = <$wmldir/*.xml>; ### Loop over all files ### # met een foreach over @ARGV kun je het script aanroepen met ../wml/*.xml # dit wordt door de shell al uitgevouwen tot ../wml/azijnzuur.xml etc.. foreach my $wmlfile (@ARGV) { my @file = (); my $change_needed = ""; print "Reading $wmlfile... "; open (FILE, "<$wmlfile") || die "$wmlfile: $!$/"; while (<FILE>) { my $line = $_; if ($line =~ m/<INSERT.*?>(.*?)\.cml<\/INSERT>/) { my $casnummer = $1; $change_needed = "yes"; print "new extension necessary ($casnummer, $line)..."; push (@file, " <INSERT MIME=\"chemical/cml\" CLASS=\"PhysicalProperties\">$casnummer.xml<\/INSERT>$/"); } else { push (@file, $line); } } close(FILE); if ($change_needed eq "yes") { open (OUT, ">$wmldir/$wmlfile") || die "File $wmlfile not writeable!$/"; foreach my $line (@file) { print OUT $line; } close(OUT); print "Changed!$/"; } else { print "Nothing done!$/"; } } print "-----------------$/"; print "Files changed: $nr_changed$/"; --- NEW FILE: zet-wmls-op-ISO-8859-1-encoding.pl --- #! /usr/bin/perl -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $nr_changed = 0; ### de lange versie van opendir ### #opendir (WMLDIR, $wmldir) || die "WMLDIR: $!$/"; #my @wmlfiles = grep {/\.xml/i} readdir(WMLDIR); #closedir (WMLDIR) || die "$wmldir: $!$/"; ### de korte versie van opendir ### #my @wmlfiles = <$wmldir/*.xml>; ### Loop over all files ### # met een foreach over @ARGV kun je het script aanroepen met ../wml/*.xml # dit wordt door de shell al uitgevouwen tot ../wml/azijnzuur.xml etc.. foreach my $wmlfile (@ARGV) { my @file = (); my $change_needed = ""; print "Reading $wmlfile... "; if (open (FILE, "<$wmlfile")) { while (<FILE>) { my $line = $_; if (($line =~ m/<\?xml.*?\?>/) && ($line !~ m/ISO-8859-1/)) { $change_needed = "yes"; print "new encoding necessary... "; unshift (@file, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>$/"); # geen push maar een unshift, om deze regel pertinent als eerste te krijgen # } else { push (@file, $line); } } close(FILE); } else { warn "$wmlfile: $!$/"; } if ($change_needed eq "yes") { if (open (OUT, ">$wmldir/$wmlfile")) { foreach my $line (@file) { print OUT $line; } close(OUT); print "Changed!$/"; $nr_changed++; } else { warn "File $wmlfile not writeable!$/"; } } else { print "Nothing done!$/"; } } print "-----------------$/"; print "Files changed: $nr_changed$/"; --- NEW FILE: zet-wmls-op-ISO-8859-1-encoding.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $nr_changed = 0; ### de lange versie van opendir ### #opendir (WMLDIR, $wmldir) || die "WMLDIR: $!$/"; #my @wmlfiles = grep {/\.xml/i} readdir(WMLDIR); #closedir (WMLDIR) || die "$wmldir: $!$/"; ### de korte versie van opendir ### #my @wmlfiles = <$wmldir/*.xml>; ### Loop over all files ### # met een foreach over @ARGV kun je het script aanroepen met ../wml/*.xml # dit wordt door de shell al uitgevouwen tot ../wml/azijnzuur.xml etc.. foreach my $wmlfile (@ARGV) { my @file = (); my $change_needed = ""; print "Reading $wmlfile... "; if (open (FILE, "<$wmlfile")) { while (<FILE>) { my $line = $_; if (($line =~ m/<\?xml.*?\?>/) && ($line !~ m/ISO-8859-1/)) { $change_needed = "yes"; print "new encoding necessary... "; unshift (@file, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>$/"); # geen push maar een unshift, om deze regel pertinent als eerste te krijgen # } else { push (@file, $line); } } close(FILE); } else { warn "$wmlfile: $!$/"; } if ($change_needed eq "yes") { if (open (OUT, ">$wmldir/$wmlfile")) { foreach my $line (@file) { print OUT $line; } close(OUT); print "Changed!$/"; $nr_changed++; } else { warn "File $wmlfile not writeable!$/"; } } else { print "Nothing done!$/"; } } print "-----------------$/"; print "Files changed: $nr_changed$/"; --- NEW FILE: zoek-cas-bij-pdbs-voor-dadml.pl --- #! /usr/bin/perl -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/compounds/pdb/new"; my $existsfile = "tmp-find-cas-exists"; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("LANG"); foreach my $pdbfile (@pdbfiles) { $pdbfile =~ s/\.pdb//ig; $pdbfile =~ tr/A-Z/a-z/; my $namematch = &exists (("LANG" => $pdbfile)); if ($namematch) { print "NAME match for $pdbfile.pdb:\t$namematch$/" if $debug; my ($wmlfile) = split (":", $namematch); $wmlfile =~ s/^\s*//i; print "WMLFILE:$wmlfile.$/" if $debug; my $cas = `grep -i CAS-NUMBER $wmldir/$wmlfile`; if ($cas) { chomp ($cas); $cas =~ s/\s*<INDEX.*?>\s*//ig; $cas =~ s/\s*<\/INDEX.*?>\s*//ig; print "CAS number $cas\tfound for $pdbfile.pdb\tin $wmlfile$/"; } else { print "no CAS number found for $pdbfile.pdb in $wmlfile$/" if $debug; } } else { print "Dangling PDB-file $pdbfile.pdb$/" if $debug; } } &remove_existskeyfile ("LANG"); sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } --- NEW FILE: zoek-cas-bij-pdbs-voor-dadml.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; my $wmldir = "../../data/wml"; my $pdbdir = "../../data/compounds/pdb/new"; my $existsfile = "tmp-find-cas-exists"; opendir (PDBDIR, $pdbdir) || die "$pdbdir: $!$/"; my @pdbfiles = grep {/\.pdb/i} readdir(PDBDIR); closedir (PDBDIR) || die "$pdbdir: $!$/"; &create_existskeyfile ("LANG"); foreach my $pdbfile (@pdbfiles) { $pdbfile =~ s/\.pdb//ig; $pdbfile =~ tr/A-Z/a-z/; my $namematch = &exists (("LANG" => $pdbfile)); if ($namematch) { print "NAME match for $pdbfile.pdb:\t$namematch$/" if $debug; my ($wmlfile) = split (":", $namematch); $wmlfile =~ s/^\s*//i; print "WMLFILE:$wmlfile.$/" if $debug; my $cas = `grep -i CAS-NUMBER $wmldir/$wmlfile`; if ($cas) { chomp ($cas); $cas =~ s/\s*<INDEX.*?>\s*//ig; $cas =~ s/\s*<\/INDEX.*?>\s*//ig; print "CAS number $cas\tfound for $pdbfile.pdb\tin $wmlfile$/"; } else { print "no CAS number found for $pdbfile.pdb in $wmlfile$/" if $debug; } } else { print "Dangling PDB-file $pdbfile.pdb$/" if $debug; } } &remove_existskeyfile ("LANG"); sub create_existskeyfile { my $key = shift; `grep -i $key $wmldir/*.xml > $existsfile$key`; } sub remove_existskeyfile { my $key = shift; `rm $existsfile$key`; } sub exists { my %filters = @_; foreach my $key (keys %filters) { my $out = `grep -i $filters{$key} $existsfile$key`; if ($out) { chomp ($out); $out =~ s/$wmldir\///ig; $out =~ s/:\s*/:\t/ig; return "$out"; } } return ""; } --- NEW FILE: zoek-chems-zonder-cas.pl --- #! /usr/bin/perl -w use strict; my $debug = ""; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_checked = 0; my $nr_chemicals = 0; my $nr_groups = 0; my $nr_noCAS = 0; # Loop over all files foreach my $arg (@ARGV) { &look_for_CAS_in_file ($arg); } # Print statistics print " ------------------------$/"; print "Files checked : $nr_checked$/"; if ($nr_chemicals) { print "Chemicals found : $nr_chemicals$/"; print "Chemical groups found : $nr_groups$/" if ($nr_groups); if ($nr_noCAS) { print "Chemicals without CAS : $nr_noCAS$/"; } else { print "All chemicals had CAS!$/"; } } else { print "No Chemicals found.$/"; } sub look_for_CAS_in_file { my $file = shift; my $changed = ""; print "Processing $file\t"; if (open (FILE, $file)) { my $cas_found = ""; my $chemical = ""; my $group = ""; while (<FILE>) { if (/CHEMICAL/i) { print "CHEMICAL! " if (!$chemical); # 1 hit is enough # $chemical = "True"; $nr_chemicals++; } elsif (/CLASS="GROUP"/i) { print "GROUP! " if (!$group); # 1 hit is enough # $group = "True"; $nr_groups++; } elsif (/(CAS|WOC)-NUMBER/i && /\>(.+?)\</) { my $type = $1; print "$type number found...$/" if ($debug && ! $cas_found); # 1 hit is enough # $cas_found = "True"; } } if ($chemical && !$group && !$cas_found) { $nr_noCAS++; print "No CAS number found...$/"; } else { print "$/" unless $debug; } close (FILE) || warn "$file: $!$/"; $nr_checked++; } else { warn "$file: $!$/"; } } --- NEW FILE: zoek-chems-zonder-cas.pl.in --- #! @PATHTOPERL@ -w use strict; my $debug = ""; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_checked = 0; my $nr_chemicals = 0; my $nr_groups = 0; my $nr_noCAS = 0; # Loop over all files foreach my $arg (@ARGV) { &look_for_CAS_in_file ($arg); } # Print statistics print " ------------------------$/"; print "Files checked : $nr_checked$/"; if ($nr_chemicals) { print "Chemicals found : $nr_chemicals$/"; print "Chemical groups found : $nr_groups$/" if ($nr_groups); if ($nr_noCAS) { print "Chemicals without CAS : $nr_noCAS$/"; } else { print "All chemicals had CAS!$/"; } } else { print "No Chemicals found.$/"; } sub look_for_CA... [truncated message content] |