Update of /cvsroot/woc/woc/src/woc/src/download In directory usw-pr-cvs1:/tmp/cvs-serv23171/woc/src/download Added Files: Makefile.am download_pdb_from_NCI.pl download_pdb_from_NCI.pl.in download_xyz_from_NCI.pl download_xyz_from_NCI.pl.in Log Message: --- NEW FILE: Makefile.am --- bindir=${prefix}/bin/download bin_SCRIPTS = \ download_pdb_from_NCI.pl \ download_xyz_from_NCI.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: download_pdb_from_NCI.pl --- #! /usr/bin/perl -w use diagnostics; use strict; use LWP; use LWP::Simple; use XML::XPath; my $root = "../../data/dadml/3d/pdb"; my $urlbase = "http://131.188.127.153/cgi-bin/services/ncidb/ncidb2.tcl"; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_downloaded_PDBs = 0; my $nr_PDB_in_NCI = 0; my $nr_PDB_already_done = 0; my $nr_CAS_found = 0; my $nr_files = scalar @ARGV; # Loop over all files my $cas = ""; foreach my $arg (@ARGV) { print "Checking $arg...$/"; my $xp = XML::XPath->new(filename => $arg); my $cas = $xp->findvalue('/ITEM/INDEX[@CLASS="CAS-NUMBER"]'); if ($cas ne "" && (!-e "$root/$cas.pdb")) { $nr_CAS_found++; my $url = "$urlbase?op1=cas&data1=$cas&op2=cas&data2=&op3=inclformula&data3=&op4=fse&data4=&dohighlight=1&andor=and&maxhits=100&timeout=90&output=pdb&sort=nsc"; print "URL: $url\n"; my $browser = LWP::UserAgent->new(); $browser->agent("WOC Downloader"); my $request = HTTP::Request->new(GET => $url); print STDERR " ...trying to download PDB for $cas"; my $foo = $browser->request($request); if ($foo->is_success) { my $content = $foo->content || ""; if ($content ne "" && !($content =~ /HTML/i)) { print " ... and saving$/"; open (PDBFILE, ">$root/$cas.pdb"); print PDBFILE $content; $nr_downloaded_PDBs++; } else { print " ... no PDB file found$/"; $nr_PDB_in_NCI++; } } else { print " ... unexpected error has occured\n"; } } else { if ($cas eq "") { print " ... no CAS number$/"; } else { print " ...$cas.pdb already present!$/"; $nr_PDB_already_done++; } } } # Print statistics print " ----------------------------$/"; print "Files checked : $nr_files$/"; print " CAS number found : $nr_CAS_found$/"; print " PDB files downloaded : $nr_downloaded_PDBs$/"; print " PDB files not in NCI : $nr_PDB_in_NCI$/"; print " PDB already done : $nr_PDB_already_done$/"; --- NEW FILE: download_pdb_from_NCI.pl.in --- #! @PATHTOPERL@ -w use diagnostics; use strict; use LWP; use LWP::Simple; use XML::XPath; my $root = "../../data/dadml/3d/pdb"; my $urlbase = "http://131.188.127.153/cgi-bin/services/ncidb/ncidb2.tcl"; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_downloaded_PDBs = 0; my $nr_PDB_in_NCI = 0; my $nr_PDB_already_done = 0; my $nr_CAS_found = 0; my $nr_files = scalar @ARGV; # Loop over all files my $cas = ""; foreach my $arg (@ARGV) { print "Checking $arg...$/"; my $xp = XML::XPath->new(filename => $arg); my $cas = $xp->findvalue('/ITEM/INDEX[@CLASS="CAS-NUMBER"]'); if ($cas ne "" && (!-e "$root/$cas.pdb")) { $nr_CAS_found++; my $url = "$urlbase?op1=cas&data1=$cas&op2=cas&data2=&op3=inclformula&data3=&op4=fse&data4=&dohighlight=1&andor=and&maxhits=100&timeout=90&output=pdb&sort=nsc"; print "URL: $url\n"; my $browser = LWP::UserAgent->new(); $browser->agent("WOC Downloader"); my $request = HTTP::Request->new(GET => $url); print STDERR " ...trying to download PDB for $cas"; my $foo = $browser->request($request); if ($foo->is_success) { my $content = $foo->content || ""; if ($content ne "" && !($content =~ /HTML/i)) { print " ... and saving$/"; open (PDBFILE, ">$root/$cas.pdb"); print PDBFILE $content; $nr_downloaded_PDBs++; } else { print " ... no PDB file found$/"; $nr_PDB_in_NCI++; } } else { print " ... unexpected error has occured\n"; } } else { if ($cas eq "") { print " ... no CAS number$/"; } else { print " ...$cas.pdb already present!$/"; $nr_PDB_already_done++; } } } # Print statistics print " ----------------------------$/"; print "Files checked : $nr_files$/"; print " CAS number found : $nr_CAS_found$/"; print " PDB files downloaded : $nr_downloaded_PDBs$/"; print " PDB files not in NCI : $nr_PDB_in_NCI$/"; print " PDB already done : $nr_PDB_already_done$/"; --- NEW FILE: download_xyz_from_NCI.pl --- #! /usr/bin/perl -w use diagnostics; use strict; use LWP; use LWP::Simple; my $root = "../../data/dadml/3d/xyz"; my $urlbase = "http://131.188.127.153/cgi-bin/services/ncidb/ncidb2.tcl"; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $no_downloaded_XYZs = 0; my $no_XYZ_in_NCI = 0; my $no_XYZ_already_done = 0; my $no_CAS_found = 0; my $no_files = scalar @ARGV; # Loop over all files my $cas = ""; foreach my $arg (@ARGV) { print "Checking $arg...$/"; open (FILEHANDLE, "<$arg"); $cas = ""; while (<FILEHANDLE>) { if (/CAS-NUMBER/i && /\>(.+?)\</) { $cas = $1; } }; if ($cas ne "" && (!-e "$root/$cas.xyz")) { $no_CAS_found++; my $url = "$urlbase?op1=cas&data1=$cas&op2=cas&data2=&op3=inclformula&data3=&op4=fse&data4=&dohighlight=1&andor=and&maxhits=100&timeout=90&output=xyz&sort=nsc"; my $browser = LWP::UserAgent->new(); $browser->agent("WOC Downloader"); my $request = HTTP::Request->new(GET => $url); print STDERR " ...downloading XYZ for $cas"; my $foo = $browser->request($request); if ($foo->is_success) { my $content = $foo->content || ""; if ($content ne "" && !($content =~ /HTML/i)) { print " ... and saving$/"; open (XYZFILE, ">$root/$cas.xyz"); print XYZFILE $content; $no_downloaded_XYZs++; } else { print " ... no XYZ file found$/"; $no_XYZ_in_NCI++; } } } else { if ($cas eq "") { print " ... no CAS number$/"; } else { print " ...$cas.xyz already downloaded!$/"; $no_XYZ_already_done++; } } } # Print statistics print " ----------------------------$/"; print "Files checked : $no_files$/"; print " CAS number found : $no_CAS_found$/"; print " XYZ files downloaded : $no_downloaded_XYZs$/"; print " XYZ files not in NCI : $no_XYZ_in_NCI$/"; print " XYZ already done : $no_XYZ_already_done$/"; --- NEW FILE: download_xyz_from_NCI.pl.in --- #! @PATHTOPERL@ -w use diagnostics; use strict; use LWP; use LWP::Simple; my $root = "../../data/dadml/3d/xyz"; my $urlbase = "http://131.188.127.153/cgi-bin/services/ncidb/ncidb2.tcl"; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $no_downloaded_XYZs = 0; my $no_XYZ_in_NCI = 0; my $no_XYZ_already_done = 0; my $no_CAS_found = 0; my $no_files = scalar @ARGV; # Loop over all files my $cas = ""; foreach my $arg (@ARGV) { print "Checking $arg...$/"; open (FILEHANDLE, "<$arg"); $cas = ""; while (<FILEHANDLE>) { if (/CAS-NUMBER/i && /\>(.+?)\</) { $cas = $1; } }; if ($cas ne "" && (!-e "$root/$cas.xyz")) { $no_CAS_found++; my $url = "$urlbase?op1=cas&data1=$cas&op2=cas&data2=&op3=inclformula&data3=&op4=fse&data4=&dohighlight=1&andor=and&maxhits=100&timeout=90&output=xyz&sort=nsc"; my $browser = LWP::UserAgent->new(); $browser->agent("WOC Downloader"); my $request = HTTP::Request->new(GET => $url); print STDERR " ...downloading XYZ for $cas"; my $foo = $browser->request($request); if ($foo->is_success) { my $content = $foo->content || ""; if ($content ne "" && !($content =~ /HTML/i)) { print " ... and saving$/"; open (XYZFILE, ">$root/$cas.xyz"); print XYZFILE $content; $no_downloaded_XYZs++; } else { print " ... no XYZ file found$/"; $no_XYZ_in_NCI++; } } } else { if ($cas eq "") { print " ... no CAS number$/"; } else { print " ...$cas.xyz already downloaded!$/"; $no_XYZ_already_done++; } } } # Print statistics print " ----------------------------$/"; print "Files checked : $no_files$/"; print " CAS number found : $no_CAS_found$/"; print " XYZ files downloaded : $no_downloaded_XYZs$/"; print " XYZ files not in NCI : $no_XYZ_in_NCI$/"; print " XYZ already done : $no_XYZ_already_done$/"; |