From: Egon W. <eg...@us...> - 2002-07-20 14:50:37
|
Update of /cvsroot/woc/woc/bin/download In directory usw-pr-cvs1:/tmp/cvs-serv28633/download Added Files: Makefile.am Makefile.in download_pdb_from_NCI.pl.in download_xyz_from_NCI.pl.in Log Message: Added scripts for autodownloading of 3D coords files from the NCI database. --- NEW FILE: Makefile.am --- bin_SCRIPTS = \ download_pdb_from_NCI.pl \ download_xyz_from_NCI.pl CLEANFILES = $(bin_SCRIPTS) --- NEW FILE: Makefile.in --- # Makefile.in generated automatically by automake 1.5 from Makefile.am. # Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 # Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ SHELL = @SHELL@ srcdir = @srcdir@ top_srcdir = @top_srcdir@ VPATH = @srcdir@ prefix = @prefix@ exec_prefix = @exec_prefix@ bindir = @bindir@ sbindir = @sbindir@ libexecdir = @libexecdir@ datadir = @datadir@ sysconfdir = @sysconfdir@ sharedstatedir = @sharedstatedir@ localstatedir = @localstatedir@ libdir = @libdir@ infodir = @infodir@ mandir = @mandir@ includedir = @includedir@ oldincludedir = /usr/include pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ top_builddir = ../.. ACLOCAL = @ACLOCAL@ AUTOCONF = @AUTOCONF@ AUTOMAKE = @AUTOMAKE@ AUTOHEADER = @AUTOHEADER@ INSTALL = @INSTALL@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_HEADER = $(INSTALL_DATA) transform = @program_transform_name@ NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : AMTAR = @AMTAR@ AWK = @AWK@ DEPDIR = @DEPDIR@ EXEEXT = @EXEEXT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ OBJEXT = @OBJEXT@ PACKAGE = @PACKAGE@ PATHTOPERL = @PATHTOPERL@ PATHTOSH = @PATHTOSH@ PATHTOXSLTPROC = @PATHTOXSLTPROC@ VERSION = @VERSION@ am__include = @am__include@ am__quote = @am__quote@ install_sh = @install_sh@ bin_SCRIPTS = \ download_pdb_from_NCI.pl \ download_xyz_from_NCI.pl CLEANFILES = $(bin_SCRIPTS) subdir = bin/download mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs CONFIG_CLEAN_FILES = download_pdb_from_NCI.pl download_xyz_from_NCI.pl SCRIPTS = $(bin_SCRIPTS) DIST_SOURCES = DIST_COMMON = Makefile.am Makefile.in download_pdb_from_NCI.pl.in \ download_xyz_from_NCI.pl.in all: all-am .SUFFIXES: $(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) cd $(top_srcdir) && \ $(AUTOMAKE) --gnu bin/download/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status cd $(top_builddir) && \ CONFIG_HEADERS= CONFIG_LINKS= \ CONFIG_FILES=$(subdir)/$@ $(SHELL) ./config.status download_pdb_from_NCI.pl: $(top_builddir)/config.status download_pdb_from_NCI.pl.in cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= CONFIG_LINKS= $(SHELL) ./config.status download_xyz_from_NCI.pl: $(top_builddir)/config.status download_xyz_from_NCI.pl.in cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= CONFIG_LINKS= $(SHELL) ./config.status install-binSCRIPTS: $(bin_SCRIPTS) @$(NORMAL_INSTALL) $(mkinstalldirs) $(DESTDIR)$(bindir) @list='$(bin_SCRIPTS)'; for p in $$list; do \ f="`echo $$p|sed '$(transform)'`"; \ if test -f $$p; then \ echo " $(INSTALL_SCRIPT) $$p $(DESTDIR)$(bindir)/$$f"; \ $(INSTALL_SCRIPT) $$p $(DESTDIR)$(bindir)/$$f; \ elif test -f $(srcdir)/$$p; then \ echo " $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(bindir)/$$f"; \ $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(bindir)/$$f; \ else :; fi; \ done uninstall-binSCRIPTS: @$(NORMAL_UNINSTALL) @list='$(bin_SCRIPTS)'; for p in $$list; do \ f="`echo $$p|sed '$(transform)'`"; \ echo " rm -f $(DESTDIR)$(bindir)/$$f"; \ rm -f $(DESTDIR)$(bindir)/$$f; \ done uninstall-info-am: tags: TAGS TAGS: DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) top_distdir = ../.. distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) distdir: $(DISTFILES) @for file in $(DISTFILES); do \ if test -f $$file; then d=.; else d=$(srcdir); fi; \ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ if test "$$dir" != "$$file" && test "$$dir" != "."; then \ $(mkinstalldirs) "$(distdir)/$$dir"; \ fi; \ if test -d $$d/$$file; then \ cp -pR $$d/$$file $(distdir) \ || exit 1; \ else \ test -f $(distdir)/$$file \ || cp -p $$d/$$file $(distdir)/$$file \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(SCRIPTS) installdirs: $(mkinstalldirs) $(DESTDIR)$(bindir) install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ `test -z '$(STRIP)' || \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -rm -f Makefile $(CONFIG_CLEAN_FILES) stamp-h stamp-h[0-9]* maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic mostlyclean-am distclean: distclean-am distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: info: info-am info-am: install-data-am: install-exec-am: install-binSCRIPTS install-info: install-info-am install-man: installcheck-am: maintainer-clean: maintainer-clean-am maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic uninstall-am: uninstall-binSCRIPTS uninstall-info-am .PHONY: all all-am check check-am clean clean-generic distclean \ distclean-generic distdir dvi dvi-am info info-am install \ install-am install-binSCRIPTS install-data install-data-am \ install-exec install-exec-am install-info install-info-am \ install-man install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic uninstall uninstall-am \ uninstall-binSCRIPTS uninstall-info-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: --- NEW FILE: download_pdb_from_NCI.pl.in --- #! @PATHTOPERL@ -w use diagnostics; use strict; use LWP; use LWP::Simple; use XML::XPath; my $root = "../../data/dadml/3d/pdb"; my $urlbase = "http://131.188.127.153/cgi-bin/services/ncidb/ncidb2.tcl"; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $nr_downloaded_PDBs = 0; my $nr_PDB_in_NCI = 0; my $nr_PDB_already_done = 0; my $nr_CAS_found = 0; my $nr_files = scalar @ARGV; # Loop over all files my $cas = ""; foreach my $arg (@ARGV) { print "Checking $arg...$/"; my $xp = XML::XPath->new(filename => $arg); my $cas = $xp->findvalue('/ITEM/INDEX[@CLASS="CAS-NUMBER"]'); if ($cas ne "" && (!-e "$root/$cas.pdb")) { $nr_CAS_found++; my $url = "$urlbase?op1=cas&data1=$cas&op2=cas&data2=&op3=inclformula&data3=&op4=fse&data4=&dohighlight=1&andor=and&maxhits=100&timeout=90&output=pdb&sort=nsc"; print "URL: $url\n"; my $browser = LWP::UserAgent->new(); $browser->agent("WOC Downloader"); my $request = HTTP::Request->new(GET => $url); print STDERR " ...trying to download PDB for $cas"; my $foo = $browser->request($request); if ($foo->is_success) { my $content = $foo->content || ""; if ($content ne "" && !($content =~ /HTML/i)) { print " ... and saving$/"; open (PDBFILE, ">$root/$cas.pdb"); print PDBFILE $content; $nr_downloaded_PDBs++; } else { print " ... no PDB file found$/"; $nr_PDB_in_NCI++; } } else { print " ... unexpected error has occured\n"; } } else { if ($cas eq "") { print " ... no CAS number$/"; } else { print " ...$cas.pdb already present!$/"; $nr_PDB_already_done++; } } } # Print statistics print " ----------------------------$/"; print "Files checked : $nr_files$/"; print " CAS number found : $nr_CAS_found$/"; print " PDB files downloaded : $nr_downloaded_PDBs$/"; print " PDB files not in NCI : $nr_PDB_in_NCI$/"; print " PDB already done : $nr_PDB_already_done$/"; --- NEW FILE: download_xyz_from_NCI.pl.in --- #! @PATHTOPERL@ -w use diagnostics; use strict; use LWP; use LWP::Simple; my $root = "../../data/dadml/3d/xyz"; my $urlbase = "http://131.188.127.153/cgi-bin/services/ncidb/ncidb2.tcl"; # Check command line options if (@ARGV == 0) { print "Usage: $0 <xml-files>$/"; exit; } # Global variables my $no_downloaded_XYZs = 0; my $no_XYZ_in_NCI = 0; my $no_XYZ_already_done = 0; my $no_CAS_found = 0; my $no_files = scalar @ARGV; # Loop over all files my $cas = ""; foreach my $arg (@ARGV) { print "Checking $arg...$/"; open (FILEHANDLE, "<$arg"); $cas = ""; while (<FILEHANDLE>) { if (/CAS-NUMBER/i && /\>(.+?)\</) { $cas = $1; } }; if ($cas ne "" && (!-e "$root/$cas.xyz")) { $no_CAS_found++; my $url = "$urlbase?op1=cas&data1=$cas&op2=cas&data2=&op3=inclformula&data3=&op4=fse&data4=&dohighlight=1&andor=and&maxhits=100&timeout=90&output=xyz&sort=nsc"; my $browser = LWP::UserAgent->new(); $browser->agent("WOC Downloader"); my $request = HTTP::Request->new(GET => $url); print STDERR " ...downloading XYZ for $cas"; my $foo = $browser->request($request); if ($foo->is_success) { my $content = $foo->content || ""; if ($content ne "" && !($content =~ /HTML/i)) { print " ... and saving$/"; open (XYZFILE, ">$root/$cas.xyz"); print XYZFILE $content; $no_downloaded_XYZs++; } else { print " ... no XYZ file found$/"; $no_XYZ_in_NCI++; } } } else { if ($cas eq "") { print " ... no CAS number$/"; } else { print " ...$cas.xyz already downloaded!$/"; $no_XYZ_already_done++; } } } # Print statistics print " ----------------------------$/"; print "Files checked : $no_files$/"; print " CAS number found : $no_CAS_found$/"; print " XYZ files downloaded : $no_downloaded_XYZs$/"; print " XYZ files not in NCI : $no_XYZ_in_NCI$/"; print " XYZ already done : $no_XYZ_already_done$/"; |