From: kirovs <ki...@us...> - 2005-12-05 17:40:15
|
Update of /cvsroot/cogs/ensupdate In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29431 Modified Files: getensembl.pl Log Message: wrong again... Index: getensembl.pl =================================================================== RCS file: /cvsroot/cogs/ensupdate/getensembl.pl,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** getensembl.pl 5 Dec 2005 17:26:40 -0000 1.2 --- getensembl.pl 5 Dec 2005 17:40:07 -0000 1.3 *************** *** 1,5 **** #!/usr/bin/perl # ! # getensembl.pl # set the following in the code below # connects to ftpsite <host> --- 1,6 ---- #!/usr/bin/perl + # ! # Developed by Stefan Kirov 2004 # set the following in the code below # connects to ftpsite <host> *************** *** 7,194 **** # puts files in the local directory <locdir> ! # # Load the Net::FTP package - # use Net::FTP; ! my $prefix=$ENV{ENS_HOME}; ! my $cprefix=$ENV{GKDB_HOME}; ! # # Load the directory parser - # use File::Listing qw(parse_dir); ! $| = 1; $host = 'ftp.ensembl.org'; $locdir = "$prefix/newdata"; ! ! # ! # open a file to collect the database versions ! # ! #if (! ! open (DATAVER, ">>$locdir/ensembl_version.dat"); ! # { print "Cannot open .dat file $locdir/ensembl_version.dat\n"; ! # exit(1); ! # } ! ! # ! # connect to ENSEMBL ftp site ! # $ftp = Net::FTP->new($host) or die "Cannot contact $host\n:$!"; ! $ftp->login('anonymous', 'sch...@or...') or die "Cannot login ($host)\n:". $ftp->message; print " Logged into $host\n\n"; $ftp->binary(); - - # - # for each organism retrieve 7 files - # - %orgdirs = qw (human homo_sapiens_core - mouse mus_musculus_core - rat rattus_norvegicus_core - zebrafish danio_rerio_core - fly drosophila_melanogaster_core - mosquito anopheles_gambiae_core - celegans caenorhabditis_elegans_core - ); - - %orgval = ('human','Homo sapiens', - 'mouse','Mus musculus', - 'rat','Rattus norvegicus', - 'zebrafish','Danio rerio', - 'fly', 'Drosophila melanogaster', - 'mosquito', 'Anopheles gambiae', - 'celegans', 'Caenordhabditis elegans' - ); - - @organisms = keys(%orgdirs); - - @getfiles = qw (external_db.txt.table.gz - external_synonym.txt.table.gz - gene_stable_id.txt.table.gz - identity_xref.txt.table.gz - object_xref.txt.table.gz - transcript.txt.table.gz - transcript_stable_id.txt.table.gz - translation.txt.table.gz - xref.txt.table.gz); - - - foreach $organism (@organisms) - { - # - # delete existing .gz files to prepare for replacements - # - foreach $getfile (@getfiles) - { $gzfile = $locdir."/".$organism."_".$getfile; - if (-e $gzfile) - { print "Deleting $gzfile\n"; - unlink($gzfile); - } - } - - # - # delete existing .table files to prepare for replacements - # - foreach $getfile (@getfiles) - { $tablefile = $locdir."/".$organism."_".substr($getfile,0,length($getfile) - 3); - if (-e $tablefile) - { print "Deleting $tablefile\n"; - unlink($tablefile); - } - } - - $tablefile =''; - } - - # - # delete existing .dat files to prepare for replacements - # - foreach $getfile (@getfiles) - { $datfile = $locdir."/".substr($getfile,0,length($getfile) - 13).".dat"; - if (-e $datfile) - { print "Deleting $datfile\n"; - unlink($datfile); - } - } - - $datfile = ''; - - # # change to the directory where files will be stored - # - chdir "$locdir"; - my ($done,$syndone); ! foreach $organism (@organisms) ! { ! # ! # determine remote directory for the current version ! # ! $rmtdir = "/pub/current_$organism/data/mysql"; ! $ftp->cwd($rmtdir) or die "Cannot change directory ($rmtdir)\n:".$ftp->message; ! @ls = $ftp->ls('-lR'); ! foreach $file (parse_dir(\@ls)){ ! my($name, $type, $size, $mtime, $mode) = @$file; ! if (index($name,"$orgdirs{$organism}") == 0) ! {$orgversion = $name; ! print (DATAVER "$orgval{$organism}|$orgversion\n"); ! } ! } ! ! $ftp->cwd("$rmtdir/$orgversion") or ! die "Cannot change directory ($rmtdir/$orgversion)\n:".$ftp->message; ! foreach $getfile (@getfiles) ! { print "Retrieving $orgversion/$getfile\n"; ! $ftp->get ("$getfile", "${organism}_$getfile" ) or ! warn "Could not get $orgversion/$getfile\n"; ! system("gunzip ${organism}_$getfile"); ! $tablefile = substr($getfile,0,length($getfile) - 3); ! $datfile = 'ens_'.substr($getfile,0,length($getfile) - 13).".dat"; ! unless ($datfile=~/identity/) { $datfile=~s/\_id//i; } ! $datfile=~s/ernal//i; ! if ($datfile eq 'ens_ext_db.dat') { ! next if ($done); #All external_db files are the same now...may change ! $done++; ! } ! # if ($datfile eq 'ens_external_synonym.dat') { ! # next if ($syndone); #All external_db files are the same now...may change ! # $syndone++; ! # } ! ! if (!open (DATFILE, ">>$locdir/$datfile")) ! { print "Cannot open .dat file $locdir/$datfile\n"; ! exit(1); ! } ! ! if (!open (TABLEFILE, "$locdir/${organism}_$tablefile")) ! { print "Cannot open .table file $locdir/${organism}_$tablefile\n"; ! exit(1); ! } ! ! while ($buf = <TABLEFILE>) ! {chomp($buf); ! $buf =~ s/\t/|/g; ! $buf =~ s/\\N/na/g; #weird stuff from ensembl??? ! $line = $orgval{$organism}.'|'.$buf."|\n"; ! print (DATFILE "$line"); ! } ! close DATFILE; ! close TABLEFILE; ! } } ! close DATAVER; ! print "Done with download\n"; ! $ftp->quit or ! die "Could not close the connection cleanly\n: $!"; exit; --- 8,143 ---- # puts files in the local directory <locdir> ! # Load the Net::FTP package use Net::FTP; ! use DBI; ! #Check for current version ! my $user=$ENV{ENS_USER}; ! my $pass=$ENV{ENS_PASS}; ! my $host=$ENV{ENS_HOST}; ! my $sock=$ENV{ENS_SOCK}; ! $enslogin=$user; ! my $sid='ensembl_databases'; ! my $cs = "dbi:mysql:$sid:$host;mysql_socket=$sock"; ! my $dbh= DBI->connect($cs,$user,$pass) or die "connecting: $DBI::errstr" ; ! my $currenth=$dbh->prepare("select db_name, version from ens_dbnames where current = 'yes'")||die "Couldn't prepare the select statement\n"; ! $currenth->execute||die "Couldn't execute select: ",$DBI::errstr,"\n"; ! my @current=@{$currenth->fetchall_arrayref}; ! foreach my $val (@current) { ! $current{$val->[0]}=$val->[1]; ! } ! my $prefix=$ENV{ENS_DATA}; # Load the directory parser use File::Listing qw(parse_dir); ! my $target; ! my @list=@ARGV; $host = 'ftp.ensembl.org'; + $rmtdir = '/pub'; + chdir $prefix; + #system("rmdir -fr newdata") if (!(-e 'newdata')); + system("mkdir newdata") if (!(-e 'newdata')); $locdir = "$prefix/newdata"; ! # connect to ftp site $ftp = Net::FTP->new($host) or die "Cannot contact $host\n:$!"; ! $ftp->login('anonymous', $enslogin) or die "Cannot login ($host)\n:". $ftp->message; print " Logged into $host\n\n"; + chdir ('newdata'); $ftp->binary(); # change to the directory where files will be stored ! $ftp->cwd($rmtdir) or die "Cannot change directory ($rmtdir)\n:".$ftp->message; + my @files=grep(/-/&&!/README/i&&!/Current/i&&!/\-2$/,$ftp->dir); #Hope there are not going to be any more db-2 entries in the future ! my @names; ! foreach my $row (@files) { ! my @dat=split(/\s/,$row); ! my $name=pop @dat; ! if (($name=~/-/) && (!grep(/\b$name\b/,@exclusion_list))){ ! push @names,$name; ! } ! } ! #Get only the most recent version ! my %dbs; ! foreach my $name (@names) { ! my @dat=split(/-/,$name); ! my $version=pop @dat; ! my $dbname=join("-",@dat); ! if ($dbs{$dbname} <$version) { ! $dbs{$dbname}=$version; } + } + my @dbs; + foreach my $key (keys %dbs) { + push @dbs, $key."-".$dbs{$key}; + } ! print "Retrieving files\n"; + my @cores; + #This is only for the core DB- get + foreach my $db (@dbs) { + $ftp->cwd("$db/data/mysql/"); + my @all=$ftp->dir; + foreach my $d (@all) { + my $req; #Check if this DB is requested + foreach my $target (@list) { + if ($d=~/$target/) { + $req++; + last; + } + } + next unless ($req); + my @long=split(/\s/,$d); + my $core=pop @long; + #Check if newer + my @dat=split(/_/,$core); + my $v1=pop @dat; + my $v2=($#dat<2)? 1:pop @dat; + my $version=$v2."_".$v1; + next unless ($current{$core}<$version); + next if (-e $core); + $ftp->cwd($core); + print "Working on $core\n"; + push @cores,$core;#Remember the DB names + mkdir($core); + chdir($core); + my @files=grep (!/total/,$ftp->dir); + foreach my $file (@files) { + my @long=split(/\s+/,$file); + my $gfile=pop @long; + next if ($gfile=~/^\./); + $dfile=$gfile; + $dfile=~s/\.gz//; + next if (-e $dfile); + print "Getting $gfile..\n"; + $ftp->get($gfile); + #Gunzip as we go + system ("gunzip $gfile &"); + } + print "Done with $core\n"; + chdir(".."); + $ftp->cwd('..'); + } + $ftp->cwd("../../.."); + + } + open (DBS, ">dbs" )||die"Cannot write the database data\n"; + foreach my $db (@cores) { + print DBS $db,"\n"; + } + close DBS; + print "Done with download\n"; + $ftp->quit or + warn "Could not close the connection cleanly\n: $!"; + exit; |