From: Egon W. <eg...@us...> - 2004-01-14 13:02:04
|
Update of /cvsroot/woc/woc/data/wml In directory sc8-pr-cvs1:/tmp/cvs-serv22668 Modified Files: .cvsignore Added Files: splitwoclist.pl syncwoclist.pl woclist.pl woclist.sh woclist.xml Removed Files: install_source Log Message: Added woclist: a single file wml source --- NEW FILE: splitwoclist.pl --- #! /usr/bin/perl use strict; use diagnostics; # Author: Geert Josten # hulp-script gebruikt door wocml.sh my $infile; my $outdir; my $outfile; if (@ARGV >= 2) { $infile = shift; $outdir = shift; } else { $infile = "woclist.xml"; $outdir = "tmp/"; } open (INFILE, "$infile"); my $inheader = 1; my $fileisopen = 0; my $incml = 0; my $header = ""; while (<INFILE>) { my $file; if (/DOCTYPE woclist/i) { s#<!DOCTYPE woclist.*>#<!DOCTYPE ITEM PUBLIC \"-//www-woc.sci.kun.nl//DTD WOC Markup Language V1.0//EN\" \"http://www-woc.sci.kun.nl/wocml.dtd\">#i; $header .= $_; } elsif (/<woclist/i) { $inheader = 0; $fileisopen = 0; } elsif (/<ITEM.*?FILE=\"(.*?)\"/i) { # print OUTFILE "\n" if $fileisopen; my $outfile = $outdir.$1.".xml"; s/FILE/CODE/i; if (open (OUTFILE, ">$outfile")) { $fileisopen = 1; print OUTFILE $header; print OUTFILE; } else { warn "Cannot write to file $outfile!\n"; $fileisopen = 0; } $inheader = 0; } elsif (/<\/woclist/i) { } elsif (/(.*?)<!-- \[(INSERT.*?)\](.*?)\[(\/INSERT)\] -->(.*)/i) { print OUTFILE "$1<$2>$3<$4>$5\n"; $incml = 1; } elsif (/<\/molecule/i) { $incml = 0; } elsif ($inheader) { $header .= $_ } else { print OUTFILE if $fileisopen && ! $incml; } } # zoals in Convert.java sub NAMELikeToANCHORLike { # heeft 1 parameter my $param = shift @_; $param =~ s/^\s*(.*?)\s*$/$1/ig; $param =~ s/\ /\_/g; $param =~ s/[,|.|<|>]//g; $param =~ tr/[A-Z]/[a-z]/; return $param; } --- NEW FILE: syncwoclist.pl --- #! /usr/bin/perl use strict; use diagnostics; # Author: Geert Josten # hulp-script gebruikt door wocml.sh my $newdir; my $orgdir; if (@ARGV >= 2) { $newdir = shift; $orgdir = shift; } else { $newdir = "tmp/"; $orgdir = "../../../../share/woc/data/wml/"; } my @newfiles = glob $newdir."*.xml"; foreach my $newfile (@newfiles) { my $orgfile = $newfile; $orgfile =~ s/^$newdir//; my $subdir = (substr $orgfile, 0, 1)."/"; $subdir =~ s/[0-9]/123/; $orgfile = $orgdir.$subdir.$orgfile; if (-s $orgfile) { my $diff = `diff $newfile $orgfile`; if ($diff) { warn "Original file different from $newfile!\n"; } } else { warn "Original file $orgfile of $newfile not found!\n"; } } --- NEW FILE: woclist.pl --- #! /usr/bin/perl use strict; use diagnostics; # Author: Geert Josten # hulp-script gebruikt door wocml.sh #my $cmldir="../../data/dadml/physicalprop/cml"; my $cmldir="../../../../share/woc/data/dadml/physicalprop/cml"; my $infile; my $outfile; if (@ARGV >= 2) { $infile = shift; $outfile = shift; } else { $infile = "woclist.tmp2.xml"; $outfile = "woclist.xml"; } open (INFILE, "$infile"); open (OUTFILE, ">$outfile"); #print eerste regel alvast $_ = <INFILE>; print OUTFILE; while (<INFILE>) { my $file; if (/ITEM.*?CODE=/i) { s/CODE/FILE/i; print OUTFILE; } elsif (/INSERT.*PhysicalProperties.*>(.*)</i) { my @inserts = `awk '/<molecule/,/<\\/molecule/{print " ",\$0}' $cmldir/$1`; s/<(INSERT.*?)>(.*?)<\/INSERT>/<!-- \[$1\]$2\[\/INSERT\] -->/i; print OUTFILE; foreach my $insert (@inserts) { if ($insert =~ /^(.*Stoichiometry.*>)(.*)(<.*)$/i) { my $pre = $1; my $bruto = $2; my $brutoID = $2; my $post = $3; $bruto =~ s/(\d+)/<sub>$1<\/sub>/ig; print OUTFILE $pre,$bruto,$post,$/; $pre =~ s/Stoichiometry/StoichiometryID/i; $brutoID =~ s/(\d+)/{$1}/ig; while ($brutoID =~ /\{(\d+?)\}/) { my $number = sprintf "%04i", $1; $brutoID =~ s/\{\d+?\}/$number/; } print OUTFILE $pre,$brutoID,$post,$/; } else { print OUTFILE $insert; } } } elsif (/\<\?xml.*\?\>/i) { s/\<\?xml.*\?\>//i; print OUTFILE; } else { print OUTFILE; } } # zoals in Convert.java sub NAMELikeToANCHORLike { # heeft 1 parameter my $param = shift @_; $param =~ s/^\s*(.*?)\s*$/$1/ig; $param =~ s/\ /\_/g; $param =~ s/[,|.|<|>]//g; $param =~ tr/[A-Z]/[a-z]/; return $param; } --- NEW FILE: woclist.sh --- #! /usr/bin/sh -e # Author: Geert Josten NAME="woclist" EXT=".xml" TEMP=".tmp" TEMP2=".tmp2" #WMLDIR="../../data/wml" WMLDIR="../../../../share/woc/data/wml" # cmldir is in perl-stukje gedefinieerd echo "" > $NAME$TEMP cat `find $WMLDIR/123 -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/a -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/b -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/c -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/d -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/e -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/f -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/g -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/h -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/i -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/j -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/k -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/l -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/m -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/n -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/o -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/p -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/q -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/r -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/s -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/t -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/u -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/v -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/w -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/x -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/y -name "*.xml" | grep -v templates` >> $NAME$TEMP cat `find $WMLDIR/z -name "*.xml" | grep -v templates` >> $NAME$TEMP echo '<?xml version="1.0" encoding="ISO-8859-1"?>' > $NAME$TEMP2 echo '<!DOCTYPE '$NAME' SYSTEM "../../lib/xml/dtd/'$NAME'.dtd">' >> $NAME$TEMP2 echo '<'$NAME' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' >> $NAME$TEMP2 echo ' xmlns:import="http://www.sci.kun.nl/woc/import"' >> $NAME$TEMP2 echo ' xmlns:xlink="http://www.sci.kun.nl/woc/topicmap">' >> $NAME$TEMP2 awk "/<ITEM/,/<\/ITEM/{print}" $NAME$TEMP >> $NAME$TEMP2 echo '</'$NAME'>' >> $NAME$TEMP2 /usr/bin/perl $NAME.pl $NAME$TEMP2 $NAME$EXT rm $NAME$TEMP $NAME$TEMP2 # chgrp wwwwoc $NAME$EXT chmod 664 $NAME$EXT --- NEW FILE: woclist.xml --- <?xml version="1.0" encoding="ISO-8859-1"?> <!DOCTYPE woclist SYSTEM "../../lib/xml/dtd/woclist.dtd"> <woclist xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:import="http://www.sci.kun.nl/woc/import" xmlns:xlink="http://www.sci.kun.nl/woc/topicmap"> <ITEM NAME="1,1,1-trichloorethaan" ID="WOC00001542" CLASS="CHEMICAL" FILE="111trichloorethaan"> <DICT> <NAME CLASS="IUPAC"> <LANG ID="EN">1,1,1-trichloorethane</LANG> <LANG ID="NL">1,1,1-trichloorethaan</LANG> </NAME> <NAME CLASS="TRIVIAL"> <LANG ID="EN">methylchloroform</LANG> </NAME> </DICT> <INDEX CLASS="CAS-NUMBER">25323-89-1</INDEX> <GROUP>oplosmiddelen</GROUP> </ITEM> <ITEM NAME="1,11-difenylcyclopropaan" CLASS="CHEMICAL" ID="WOC00000001" FILE="11difenylcyclopropaan"> [...38534 lines suppressed...] <NAME CLASS="TRIVIAL"> <LANG ID="NL">zwitterion</LANG> <LANG ID="EN">zwitterion</LANG> <LANG ID="FR">zwitterion</LANG> </NAME> </DICT> <WOC> <COMMENT> <p>Een zwitterion heeft zowel een positieve als negatieve lading. Moleculen met een zure en een basische groep zijn bij een bepaalde pH zwitterion. Het ion kan gevormd worden door een interne zuur-base reaktie. Een aminozuur is een voorbeeld van een zwitterion.</p> </COMMENT> <tm:topicmap xmlns:tm="http://www.topic-maps.de/content/resources\tm.dtd" xmlns:woctm="http://www.sci.kun.nl/woc/woctm"> </tm:topicmap> </WOC> </ITEM> </woclist> Index: .cvsignore =================================================================== RCS file: /cvsroot/woc/woc/data/wml/.cvsignore,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -r1.1 -r1.2 *** .cvsignore 15 Aug 2002 15:16:24 -0000 1.1 --- .cvsignore 14 Jan 2004 13:01:56 -0000 1.2 *************** *** 1,2 **** --- 1,3 ---- Makefile Makefile.in + tmp --- install_source DELETED --- |