From: kirovs <ki...@us...> - 2005-12-05 17:26:48
|
Update of /cvsroot/cogs/ensupdate In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27022 Modified Files: getensembl.pl Log Message: wrong ver Index: getensembl.pl =================================================================== RCS file: /cvsroot/cogs/ensupdate/getensembl.pl,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** getensembl.pl 2 Dec 2005 17:04:01 -0000 1.1.1.1 --- getensembl.pl 5 Dec 2005 17:26:40 -0000 1.2 *************** *** 1,6 **** #!/usr/bin/perl - # ! # Developed by Stefan Kirov 2004 # set the following in the code below # connects to ftpsite <host> --- 1,5 ---- #!/usr/bin/perl # ! # getensembl.pl # set the following in the code below # connects to ftpsite <host> *************** *** 8,143 **** # puts files in the local directory <locdir> ! # Load the Net::FTP package use Net::FTP; ! use DBI; ! #Check for current version ! my $user=$ENV{ENS_USER}; ! my $pass=$ENV{ENS_PASS}; ! my $host=$ENV{ENS_HOST}; ! my $sock=$ENV{ENS_SOCK}; ! $enslogin=$user; ! my $sid='ensembl_databases'; ! my $cs = "dbi:mysql:$sid:$host;mysql_socket=$sock"; ! my $dbh= DBI->connect($cs,$user,$pass) or die "connecting: $DBI::errstr" ; ! my $currenth=$dbh->prepare("select db_name, version from ens_dbnames where current = 'yes'")||die "Couldn't prepare the select statement\n"; ! $currenth->execute||die "Couldn't execute select: ",$DBI::errstr,"\n"; ! my @current=@{$currenth->fetchall_arrayref}; ! foreach my $val (@current) { ! $current{$val->[0]}=$val->[1]; ! } ! my $prefix=$ENV{ENS_DATA}; # Load the directory parser use File::Listing qw(parse_dir); ! my $target; ! my @list=@ARGV; $host = 'ftp.ensembl.org'; - $rmtdir = '/pub'; - chdir $prefix; - #system("rmdir -fr newdata") if (!(-e 'newdata')); - system("mkdir newdata") if (!(-e 'newdata')); $locdir = "$prefix/newdata"; ! # connect to ftp site $ftp = Net::FTP->new($host) or die "Cannot contact $host\n:$!"; ! $ftp->login('anonymous', $enslogin) or die "Cannot login ($host)\n:". $ftp->message; print " Logged into $host\n\n"; - chdir ('newdata'); $ftp->binary(); - # change to the directory where files will be stored ! $ftp->cwd($rmtdir) or ! die "Cannot change directory ($rmtdir)\n:".$ftp->message; ! my @files=grep(/-/&&!/README/i&&!/Current/i&&!/\-2$/,$ftp->dir); #Hope there are not going to be any more db-2 entries in the future ! my @names; ! foreach my $row (@files) { ! my @dat=split(/\s/,$row); ! my $name=pop @dat; ! if (($name=~/-/) && (!grep(/\b$name\b/,@exclusion_list))){ ! push @names,$name; ! } ! } ! #Get only the most recent version ! my %dbs; ! foreach my $name (@names) { ! my @dat=split(/-/,$name); ! my $version=pop @dat; ! my $dbname=join("-",@dat); ! if ($dbs{$dbname} <$version) { ! $dbs{$dbname}=$version; ! } ! } ! my @dbs; ! foreach my $key (keys %dbs) { ! push @dbs, $key."-".$dbs{$key}; ! } - print "Retrieving files\n"; ! my @cores; ! #This is only for the core DB- get ! foreach my $db (@dbs) { ! $ftp->cwd("$db/data/mysql/"); ! my @all=$ftp->dir; ! foreach my $d (@all) { ! my $req; #Check if this DB is requested ! foreach my $target (@list) { ! if ($d=~/$target/) { ! $req++; ! last; ! } } - next unless ($req); ! my @long=split(/\s/,$d); ! my $core=pop @long; ! #Check if newer ! my @dat=split(/_/,$core); ! my $v1=pop @dat; ! my $v2=($#dat<2)? 1:pop @dat; ! my $version=$v2."_".$v1; ! next unless ($current{$core}<$version); ! next if (-e $core); ! $ftp->cwd($core); ! print "Working on $core\n"; ! push @cores,$core;#Remember the DB names ! mkdir($core); ! chdir($core); ! my @files=grep (!/total/,$ftp->dir); ! foreach my $file (@files) { ! my @long=split(/\s+/,$file); ! my $gfile=pop @long; ! next if ($gfile=~/^\./); ! $dfile=$gfile; ! $dfile=~s/\.gz//; ! next if (-e $dfile); ! print "Getting $gfile..\n"; ! $ftp->get($gfile); ! #Gunzip as we go ! system ("gunzip $gfile &"); ! } ! print "Done with $core\n"; ! chdir(".."); ! $ftp->cwd('..'); ! } ! $ftp->cwd("../../.."); ! } ! open (DBS, ">dbs" )||die"Cannot write the database data\n"; ! foreach my $db (@cores) { ! print DBS $db,"\n"; ! } ! close DBS; ! print "Done with download\n"; ! $ftp->quit or ! warn "Could not close the connection cleanly\n: $!"; exit; --- 7,194 ---- # puts files in the local directory <locdir> ! # # Load the Net::FTP package + # use Net::FTP; ! my $prefix=$ENV{ENS_HOME}; ! my $cprefix=$ENV{GKDB_HOME}; ! # # Load the directory parser + # use File::Listing qw(parse_dir); ! $| = 1; $host = 'ftp.ensembl.org'; $locdir = "$prefix/newdata"; ! ! # ! # open a file to collect the database versions ! # ! #if (! ! open (DATAVER, ">>$locdir/ensembl_version.dat"); ! # { print "Cannot open .dat file $locdir/ensembl_version.dat\n"; ! # exit(1); ! # } ! ! # ! # connect to ENSEMBL ftp site ! # $ftp = Net::FTP->new($host) or die "Cannot contact $host\n:$!"; ! $ftp->login('anonymous', 'sch...@or...') or die "Cannot login ($host)\n:". $ftp->message; print " Logged into $host\n\n"; $ftp->binary(); + # + # for each organism retrieve 7 files + # + %orgdirs = qw (human homo_sapiens_core + mouse mus_musculus_core + rat rattus_norvegicus_core + zebrafish danio_rerio_core + fly drosophila_melanogaster_core + mosquito anopheles_gambiae_core + celegans caenorhabditis_elegans_core + ); ! %orgval = ('human','Homo sapiens', ! 'mouse','Mus musculus', ! 'rat','Rattus norvegicus', ! 'zebrafish','Danio rerio', ! 'fly', 'Drosophila melanogaster', ! 'mosquito', 'Anopheles gambiae', ! 'celegans', 'Caenordhabditis elegans' ! ); ! @organisms = keys(%orgdirs); ! @getfiles = qw (external_db.txt.table.gz ! external_synonym.txt.table.gz ! gene_stable_id.txt.table.gz ! identity_xref.txt.table.gz ! object_xref.txt.table.gz ! transcript.txt.table.gz ! transcript_stable_id.txt.table.gz ! translation.txt.table.gz ! xref.txt.table.gz); ! foreach $organism (@organisms) ! { ! # ! # delete existing .gz files to prepare for replacements ! # ! foreach $getfile (@getfiles) ! { $gzfile = $locdir."/".$organism."_".$getfile; ! if (-e $gzfile) ! { print "Deleting $gzfile\n"; ! unlink($gzfile); ! } ! } ! # ! # delete existing .table files to prepare for replacements ! # ! foreach $getfile (@getfiles) ! { $tablefile = $locdir."/".$organism."_".substr($getfile,0,length($getfile) - 3); ! if (-e $tablefile) ! { print "Deleting $tablefile\n"; ! unlink($tablefile); ! } ! } ! ! $tablefile =''; } ! # ! # delete existing .dat files to prepare for replacements ! # ! foreach $getfile (@getfiles) ! { $datfile = $locdir."/".substr($getfile,0,length($getfile) - 13).".dat"; ! if (-e $datfile) ! { print "Deleting $datfile\n"; ! unlink($datfile); ! } ! } ! $datfile = ''; ! ! # ! # change to the directory where files will be stored ! # ! chdir "$locdir"; ! ! my ($done,$syndone); ! ! foreach $organism (@organisms) ! { ! # ! # determine remote directory for the current version ! # ! $rmtdir = "/pub/current_$organism/data/mysql"; ! $ftp->cwd($rmtdir) or ! die "Cannot change directory ($rmtdir)\n:".$ftp->message; ! ! @ls = $ftp->ls('-lR'); ! foreach $file (parse_dir(\@ls)){ ! my($name, $type, $size, $mtime, $mode) = @$file; ! if (index($name,"$orgdirs{$organism}") == 0) ! {$orgversion = $name; ! print (DATAVER "$orgval{$organism}|$orgversion\n"); ! } ! } ! ! $ftp->cwd("$rmtdir/$orgversion") or ! die "Cannot change directory ($rmtdir/$orgversion)\n:".$ftp->message; ! foreach $getfile (@getfiles) ! { print "Retrieving $orgversion/$getfile\n"; ! $ftp->get ("$getfile", "${organism}_$getfile" ) or ! warn "Could not get $orgversion/$getfile\n"; ! system("gunzip ${organism}_$getfile"); ! $tablefile = substr($getfile,0,length($getfile) - 3); ! $datfile = 'ens_'.substr($getfile,0,length($getfile) - 13).".dat"; ! unless ($datfile=~/identity/) { $datfile=~s/\_id//i; } ! $datfile=~s/ernal//i; ! if ($datfile eq 'ens_ext_db.dat') { ! next if ($done); #All external_db files are the same now...may change ! $done++; ! } ! # if ($datfile eq 'ens_external_synonym.dat') { ! # next if ($syndone); #All external_db files are the same now...may change ! # $syndone++; ! # } + if (!open (DATFILE, ">>$locdir/$datfile")) + { print "Cannot open .dat file $locdir/$datfile\n"; + exit(1); + } + + if (!open (TABLEFILE, "$locdir/${organism}_$tablefile")) + { print "Cannot open .table file $locdir/${organism}_$tablefile\n"; + exit(1); + } + + while ($buf = <TABLEFILE>) + {chomp($buf); + $buf =~ s/\t/|/g; + $buf =~ s/\\N/na/g; #weird stuff from ensembl??? + $line = $orgval{$organism}.'|'.$buf."|\n"; + print (DATFILE "$line"); + } + close DATFILE; + close TABLEFILE; + + } + } + + close DATAVER; + print "Done with download\n"; + $ftp->quit or + die "Could not close the connection cleanly\n: $!"; + + + exit; |