From: Don G. <don...@us...> - 2007-10-15 16:19:29
|
Update of /cvsroot/gmod/schema/GMODTools/lib/Bio/GMOD/Bulkfiles In directory sc8-pr-cvs2.sourceforge.net:/tmp/cvs-serv29196/lib/Bio/GMOD/Bulkfiles Modified Files: BlastWriter.pm BulkWriter.pm FastaWriter.pm MyLargePrimarySeq.pm TableWriter.pm Log Message: no_csomesplit change for genomes with many scaffolds; validate chado variables; config updates Index: BlastWriter.pm =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/lib/Bio/GMOD/Bulkfiles/BlastWriter.pm,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** BlastWriter.pm 28 Dec 2005 02:22:07 -0000 1.5 --- BlastWriter.pm 15 Oct 2007 16:19:29 -0000 1.6 *************** *** 169,173 **** next if( @featset && ! grep({$_ eq $featn} @featset) ); ! my $ok= ( $type =~ /$intype/ && -e $fp) ; print STDERR "openInput: name=$name $featn, type=$type, ok=$ok\n" if $DEBUG; next unless $ok; --- 169,175 ---- next if( @featset && ! grep({$_ eq $featn} @featset) ); ! my $ok= ( $type =~ /$intype/ && -s $fp) ; # -e $fp ! ## need -s $fp here : files must have data (should delete 0 fasta) ! print STDERR "openInput: name=$name $featn, type=$type, ok=$ok\n" if $DEBUG; next unless $ok; *************** *** 301,305 **** } ! my $ftime= $^T - 24*60*60*(-M $blastfile); $db->{date}= POSIX::strftime("%d-%b-%Y", localtime( $ftime )); --- 303,308 ---- } ! my $ftime= -M $blastfile || 0; ! $ftime= $^T - 24*60*60*$ftime; $db->{date}= POSIX::strftime("%d-%b-%Y", localtime( $ftime )); Index: TableWriter.pm =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/lib/Bio/GMOD/Bulkfiles/TableWriter.pm,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** TableWriter.pm 12 Jan 2006 06:03:51 -0000 1.2 --- TableWriter.pm 15 Oct 2007 16:19:29 -0000 1.3 *************** *** 279,284 **** my( $targetid, $filesetinfo, $csomefeats )= @_; ! my $configfile= $filesetinfo->{config}; ! warn "TableWriter: target=$targetid, config=$configfile\n" if $DEBUG; return unless($configfile); --- 279,283 ---- my( $targetid, $filesetinfo, $csomefeats )= @_; ! my $configfile= $filesetinfo->{config} || ""; warn "TableWriter: target=$targetid, config=$configfile\n" if $DEBUG; return unless($configfile); Index: MyLargePrimarySeq.pm =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/lib/Bio/GMOD/Bulkfiles/MyLargePrimarySeq.pm,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** MyLargePrimarySeq.pm 28 Dec 2005 02:22:07 -0000 1.3 --- MyLargePrimarySeq.pm 15 Oct 2007 16:19:29 -0000 1.4 *************** *** 21,30 **** sub new { my ($class, %params) = @_; ! my $dnafile = $params{'-file'} ; ! if( $dnafile ) { delete $params{'-file'}; } my $self = $class->SUPER::new(%params); $self->dnafile($dnafile); if( $dnafile && -e $dnafile ) { ! ## $self->_filename($dnafile); # don't change to our name in case stupid wants to unlink it my $fh= new FileHandle($dnafile); $fh->seek(0,2); --- 21,29 ---- sub new { my ($class, %params) = @_; ! my $dnafile = delete $params{'-file'} ; my $self = $class->SUPER::new(%params); $self->dnafile($dnafile); if( $dnafile && -e $dnafile ) { ! ## $self->_filename($dnafile); # don't change to our name in case StUPER wants to unlink it my $fh= new FileHandle($dnafile); $fh->seek(0,2); Index: BulkWriter.pm =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/lib/Bio/GMOD/Bulkfiles/BulkWriter.pm,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** BulkWriter.pm 28 Dec 2005 02:22:07 -0000 1.5 --- BulkWriter.pm 15 Oct 2007 16:19:29 -0000 1.6 *************** *** 92,96 **** ## add these to %ENV before reading blastfiles.xml so ${vars} get replaced .. ! my $sconfig= $self->handler()->{config}; my @keys = qw( species org date title rel relfull relid release_url ); @ENV{@keys} = @{%$sconfig}{@keys}; --- 92,96 ---- ## add these to %ENV before reading blastfiles.xml so ${vars} get replaced .. ! my $sconfig= $self->handler_config; my @keys = qw( species org date title rel relfull relid release_url ); @ENV{@keys} = @{%$sconfig}{@keys}; *************** *** 117,122 **** sub handler { return shift->{handler}; } ! sub sequtil { return shift->{handler}; } # old method; drop ! sub bulkfiles { return shift->{handler}; } # old method; drop sub outputpath --- 117,126 ---- sub handler { return shift->{handler}; } ! # sub sequtil { return shift->{handler}; } # old method; drop ! # sub bulkfiles { return shift->{handler}; } # old method; drop ! ! sub config { return shift->{config}; } ! sub handler_config { return shift->{handler}->{config}; } ! sub outputpath *************** *** 171,175 **** my($self)= @_; my $config = $self->{config}; ! my $sconfig= $self->handler()->{config}; my $oroot= $sconfig->{rootpath}; --- 175,179 ---- my($self)= @_; my $config = $self->{config}; ! my $sconfig= $self->handler_config(); my $oroot= $sconfig->{rootpath}; *************** *** 216,220 **** my $fileinfo = $self->{fileinfo} || {}; ! my $mainconf = $self->handler()->{config} || {}; # copy any release-specific additions/changes to config from mainconf --- 220,224 ---- my $fileinfo = $self->{fileinfo} || {}; ! my $mainconf = $self->handler_config() || {}; # copy any release-specific additions/changes to config from mainconf *************** *** 253,259 **** } - sub config { return shift->{config}; } - #?? sub config { my $self = shift; return (@_) ? $self->{config}->{$_[0]} : $self->{config}; } - =item getconfig(@keys) --- 257,260 ---- *************** *** 272,276 **** ## need option to choose among fileinfo, handler, default my $fileinfo = $self->{fileinfo} || {}; # 1st priority or drop??? ! my $mainconf = $self->handler()->{config} || {}; # 2nd priority; e.g. main release config my $deconfig = $self->{config} || {}; # 3rd priority; default settings --- 273,277 ---- ## need option to choose among fileinfo, handler, default my $fileinfo = $self->{fileinfo} || {}; # 1st priority or drop??? ! my $mainconf = $self->handler_config() || {}; # 2nd priority; e.g. main release config my $deconfig = $self->{config} || {}; # 3rd priority; default settings *************** *** 366,369 **** --- 367,372 ---- } } + + =item openInput( $fileset ) Index: FastaWriter.pm =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/lib/Bio/GMOD/Bulkfiles/FastaWriter.pm,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** FastaWriter.pm 12 Jan 2006 06:03:51 -0000 1.6 --- FastaWriter.pm 15 Oct 2007 16:19:29 -0000 1.7 *************** *** 33,38 **** ! # debug ! #use lib("/bio/biodb/common/perl/lib", "/bio/biodb/common/system-local/perl/lib"); use POSIX; --- 33,39 ---- ! # debug# ! use lib("/bio/argos/common/perl/lib", "/bio/argos/common/system-local/perl/lib"); ! use lib("/Users/gilbertd/bio/dev/gmod/schema/GMODTools/lib/"); use POSIX; *************** *** 49,57 **** our $DEBUG = 0; my $VERSION = "1.1"; - #my $configfile= "fastawriter"; #? BulkFiles/FastaWriter.xml ! #?? how do constants overload in perl object inheritance ?? # perldoc constant: Subclasses may .. override those in their base class. ! # BUT? need to do $obj->CONSTANT not CONSTANT ??? use constant BULK_TYPE => 'fasta'; use constant CONFIG_FILE => 'fastawriter'; --- 50,57 ---- our $DEBUG = 0; my $VERSION = "1.1"; ! #? how do constants overload in perl object inheritance ?? # perldoc constant: Subclasses may .. override those in their base class. ! # BUT need to do $obj->CONSTANT not CONSTANT use constant BULK_TYPE => 'fasta'; use constant CONFIG_FILE => 'fastawriter'; *************** *** 61,69 **** my $self= shift; $self->SUPER::init(); - $DEBUG= $self->{debug} if defined $self->{debug}; - ## super does - # $self->{bulktype}= BULK_TYPE; - # $self->{configfile}= CONFIG_FILE unless defined $self->{configfile}; } --- 61,65 ---- *************** *** 79,110 **** my($self)= @_; $self->SUPER::initData(); - - ## fastafiles -- use $self->{config} instead / also with finfo ?? - # my $outdir= $self->handler()->getReleaseSubdir( $self->getconfig('path') || $self->BULK_TYPE); - # $self->{outdir} = $outdir; - - ## $self->promoteconfigs(); << now in base class - # ##? test: see fastawriter.xml for valid keys - # my @mykeys= sort keys %{$self->{config}}; - # my %cvals= $self->getconfig(@mykeys); # debug - # @{%$self}{@mykeys}= @{%cvals}{@mykeys}; - # if($DEBUG){ - # print STDERR "### initData: getconfig(@mykeys)= \n"; - # foreach my $key (@mykeys) { - # print STDERR "$key => ",$self->{$key}," <",$cvals{$key},"\n"; - # } - # } - - ## now done by promoteconfigs() - # # my $finfo= $self->{fileinfo} || $self->handler()->getFilesetInfo($self->BULK_TYPE);# - # $self->{addids} = $finfo->{addids}; - # $self->{dropnotes} = $finfo->{dropnotes}; - # $self->{allowanyfeat} = $finfo->{allowanyfeat}; - # $self->{makeall} = $finfo->{makeall}; - # $self->{dogzip} = $finfo->{dogzip}; - # $self->{perchr} = $finfo->{perchr}; - # $self->{addmd5sum} = $finfo->{addmd5sum}; - # $self->{addcrc64} = $finfo->{addcrc64}; - } --- 75,78 ---- *************** *** 128,131 **** --- 96,103 ---- print STDERR "FastaWriter::makeFiles\n" if $DEBUG; # debug + # 0710: no_csomesplit : no perchr files, only makeall + my $no_csomesplit= $self->handler_config->{no_csomesplit} || 0; # FIXME: 0710 + my $makeall= !$no_csomesplit && !$args{noall} && $self->config->{makeall}; + # more sensible that writer should ask handler for kind of files it wants my $fileset = $args{infiles}; *************** *** 140,144 **** } ! my $featset= $self->handler->{config}->{featset} || []; #? or default set ? my $addids = defined $args{addids} ? $args{addids} : $self->getconfig('addids'); --- 112,116 ---- } ! my $featset= $self->handler_config->{featset} || []; #? or default set ? my $addids = defined $args{addids} ? $args{addids} : $self->getconfig('addids'); *************** *** 155,159 **** if ($addids) { ! my $idlist= $self->readIdsFromFFF( $inh, $chr, $self->handler()->{config}); # for featmap ? $self->{idlist}= $idlist; $inh= $self->resetInput($infile); #seek($inh,0,0); ## cant do on STDIN ! cant do on PIPE ! --- 127,131 ---- if ($addids) { ! my $idlist= $self->readIdsFromFFF( $inh, $chr, $self->handler_config()); # for featmap ? $self->{idlist}= $idlist; $inh= $self->resetInput($infile); #seek($inh,0,0); ## cant do on STDIN ! cant do on PIPE ! *************** *** 170,175 **** #? use found $chromosomes= [sort keys %chrset] ; want to keep original sort order ! $self->makeall( $chromosomes, $featset) ! if (!$args{noall} && $self->config->{makeall} && $status > 0) ; print STDERR "FastaWriter::makeFiles: done n=$status\n" if $DEBUG; --- 142,146 ---- #? use found $chromosomes= [sort keys %chrset] ; want to keep original sort order ! $self->makeall( $chromosomes, $featset) if ($makeall && $status > 0) ; print STDERR "FastaWriter::makeFiles: done n=$status\n" if $DEBUG; *************** *** 184,188 **** my @features= @$featset; $chromosomes= $self->handler()->getChromosomes() unless (ref $chromosomes); ! foreach my $featn (@features) { --- 155,161 ---- my @features= @$featset; $chromosomes= $self->handler()->getChromosomes() unless (ref $chromosomes); ! my $perchr= (defined $self->config->{perchr} && $self->config->{perchr} == 0); ! # fixme: no_csomesplit ! foreach my $featn (@features) { *************** *** 209,213 **** while (<$fh>) { print $allfh $_; } close($fh); ! unlink $fn if (defined $self->config->{perchr} && $self->config->{perchr} == 0); } close($allfh); --- 182,186 ---- while (<$fh>) { print $allfh $_; } close($fh); ! unlink $fn if ($perchr); } close($allfh); *************** *** 290,313 **** my $outh= {}; my $outdir= $self->outputpath(); ! my @features= @$featset; - my @dbfeatures= (); $self->{diddbfa}= {} unless($self->{diddbfa}); ! my $featmap = $self->handler->config->{featmap}; # NOT local config ; for main config's featmap - # special case for feat == chromosome/dna -> raw2Fasta # add something like this, but dump direct from db, for EST, reagent seqs w/o csome loc ! my @fffeatures= @features; ! if (my ($featn)= grep /^chromosome/, @features) { ! my $fn= $self->get_filename ( $self->{org}, $chr, 'chromosome', $self->{rel}, $self->BULK_TYPE); ! $fn= catfile($outdir, $fn); ! $self->raw2Fasta( chr => $chr, fastafile => $fn); $ndone++; ! @fffeatures= grep !/^chromosome/, @features; ! } ! foreach my $featn (@fffeatures) { ! my $fn= $self->get_filename ( $self->{org}, $chr, $featn, $self->{rel}, $self->BULK_TYPE); $fn= catfile( $outdir, $fn); ! $outh->{$featn}= new FileHandle(">$fn"); ## check featmap for db vs fff features !? --- 263,295 ---- my $outh= {}; my $outdir= $self->outputpath(); ! ## my @features= @$featset; ! my @fffeatures= @$featset; ! print STDERR "procFasta featset=",join(",",@fffeatures),"\n" if $DEBUG; $self->{diddbfa}= {} unless($self->{diddbfa}); ! my $featmap = $self->handler_config->{featmap}; # NOT local config ; for main config's featmap # add something like this, but dump direct from db, for EST, reagent seqs w/o csome loc ! # 200710: no_csomesplit update: no dna/ files, draw all seq from chadodb ! my $no_csomesplit= $self->handler_config->{no_csomesplit} || 0; # FIXME: 0710 ! my $write = ($no_csomesplit) ? ">>" : ">"; ! my $chrout= ($no_csomesplit) ? "all" : $chr; ! my @dbfeatures= (); ! my @ffflist= @fffeatures; ! foreach my $featn (@ffflist) { ! my $fn= $self->get_filename ( $self->{org}, $chrout, $featn, $self->{rel}, $self->BULK_TYPE); $fn= catfile( $outdir, $fn); ! print STDERR "procFasta $featn outh=$fn\n" if $DEBUG; ! ! ## special case for feat == chromosome/dna -> raw2Fasta ! if(!$no_csomesplit and $featn eq "chromosome") { ! $self->raw2Fasta( chr => $chr, fastafile => $fn) unless($self->{diddbfa}->{$featn}); ! $self->{diddbfa}->{$featn}++; $ndone++; ! @fffeatures= grep !/^$featn$/, @fffeatures; ! next; ! } ! ! $outh->{$featn}= new FileHandle("$write$fn"); # no_csomesplit: append dont create here ## check featmap for db vs fff features !? *************** *** 316,320 **** push(@dbfeatures, $featn) unless($self->{diddbfa}->{$featn}); $self->{diddbfa}->{$featn}++; ! @fffeatures= grep !/^$featn/, @fffeatures; } } --- 298,302 ---- push(@dbfeatures, $featn) unless($self->{diddbfa}->{$featn}); $self->{diddbfa}->{$featn}++; ! @fffeatures= grep !/^$featn$/, @fffeatures; } } *************** *** 336,366 **** - # =item readIdsFromFFF - # - # pre-read ids from fff input for selected features for others to add_id or filter by id - # moved to base class for reuse - # - # =cut - # - # sub readIdsFromFFF - # { - # my $self= shift; - # my ($fffin,$chr,$config)= @_; - # my $idlist= {}; - # my $types_info= $config->{featmap}; - # my $nid=0; - # - # while(<$fffin>) { - # next unless(/^\w/); chomp; - # my ($type,$name,$cytomap,$baseloc,$id,$dbxref,$notes,$chr1) - # = $self->handler()->splitFFF($_, $chr); - # if ($types_info->{$type}->{get_id}) { $idlist->{$id}= $dbxref; $nid++; } - # } - # print STDERR "read ids n=$nid\n" if $DEBUG; - # return $idlist; - # } - - - sub writeheader { --- 318,321 ---- *************** *** 448,454 **** my $self= shift; my ( $fffin, $outh, $chrIn, $featset )= @_; ! my ( $lastchr ); my $nout= 0; ! my $sconfig= $self->handler->{config}; $self->{ffformat}= 0; my %lastfff= (); --- 403,409 ---- my $self= shift; my ( $fffin, $outh, $chrIn, $featset )= @_; ! my $lastchr=""; my $nout= 0; ! my $sconfig= $self->handler_config; $self->{ffformat}= 0; my %lastfff= (); *************** *** 461,464 **** --- 416,422 ---- : 0; + print STDERR "fastaFromFFFloop: ft=".join(",",@$featset),"\n" if $DEBUG; + my $ndebug=0; + while(<$fffin>) { next unless(/^\w/); chomp; *************** *** 482,495 **** foreach my $featn (@features) { ! ## this loop is tricky - print each input fff only once UNLESS special ! ## case of showing in other types_ok ($type,$name,$cytomap,$baseloc,$id,$dbxref,$notes,$chr)= @fvals; - my @fnotes= @notes; my($types_ok,$retype,$usedb,$subrange,$types_info) = $self->get_feature_set( $featn, $sconfig, $allowanyfeat); ! next unless( ($types_ok && $types_ok->{$type}) || ($allowanyfeat && !$didfeat) ); ! $self->{use_dbmd5}= $usedb; #? want sep. flag in featmap.xml ? --- 440,461 ---- foreach my $featn (@features) { ! ## this loop is tricky - print each input fff only once ! ## UNLESS special case of showing in other types_ok ($type,$name,$cytomap,$baseloc,$id,$dbxref,$notes,$chr)= @fvals; my($types_ok,$retype,$usedb,$subrange,$types_info) = $self->get_feature_set( $featn, $sconfig, $allowanyfeat); ! ! my $fah= $outh->{$featn}; ! my $hasout = ($fah) ? 1 : 0; ! my $goodfeat= ( ($types_ok && $types_ok->{$type}) || ($allowanyfeat && !$didfeat) ); ! ! # ## why cant we get gene featset now ?? ! # if($featn =~ /^gene/){ ! # print STDERR "# fffa loop: $featn>$type ok=$goodfeat did=$didfeat out=$hasout\n" if $DEBUG and 100>$ndebug++; ! # } ! next unless($goodfeat and $hasout); ! $self->{use_dbmd5}= $usedb; #? want sep. flag in featmap.xml ? *************** *** 513,522 **** } - my $fah= $outh->{$featn}; - unless($fah) { next; } #?? - my ($start,$stop,$strand)= $self->handler()->maxrange($baseloc); my $shortloc= ($stop<0) ? $baseloc : ($strand<0) ? "complement($start..$stop)" : "$start..$stop"; # option for full/short loc in header? print STDERR "getBases id=$id type=$type chr=$chr loc=$baseloc\n" if $DEBUG>2; --- 479,486 ---- } my ($start,$stop,$strand)= $self->handler()->maxrange($baseloc); my $shortloc= ($stop<0) ? $baseloc : ($strand<0) ? "complement($start..$stop)" : "$start..$stop"; # option for full/short loc in header? + my @fnotes= @notes; print STDERR "getBases id=$id type=$type chr=$chr loc=$baseloc\n" if $DEBUG>2; *************** *** 526,554 **** $types_info->{dotranslate}); ## add optional md5checksum, SwissProt CRC64 calcs; ## check if last getBases returned md5checksum my @crcs= $self->getCRCs( $id, \$bases, \@fnotes); ! my $header= $self->fastaHeader( type => $retype->{$type}||$type, name => $name, chr => $chr, location => $shortloc, ID => $id, db_xref => $dbxref, $org ? (species => $org) : (), $rel ? (release => $rel) : (), @fnotes, @crcs, ); ! if ($bases) { ! my $slen= length($bases); $bases =~ s/(.{1,50})/$1\n/g; ! print $fah ">$header; len=$slen\n",$bases; $nout++; } else { ! warn "ERROR: missing bases for $header\n"; if ($self->handler()->{failonerror}) { warn "FAILING: $chrIn $featset \n"; return undef; } # write at least one dummy base so user soft wont screw up ! print $fah ">$header; ERROR missing data\nN\n" if($self->config->{writeemptyrecords}); #? write to file or not } --- 490,521 ---- $types_info->{dotranslate}); + my $seqlen= $bases ? length($bases) : 0; ## add optional md5checksum, SwissProt CRC64 calcs; ## check if last getBases returned md5checksum my @crcs= $self->getCRCs( $id, \$bases, \@fnotes); ! my $defline= $self->fastaHeader( type => $retype->{$type}||$type, name => $name, chr => $chr, location => $shortloc, ID => $id, db_xref => $dbxref, $org ? (species => $org) : (), $rel ? (release => $rel) : (), + len => $seqlen, @fnotes, @crcs, ); ! if ($bases) { ! # my $slen= length($bases); $bases =~ s/(.{1,50})/$1\n/g; ! print $fah ">$defline\n"; # ; len=$slen ! print $fah $bases; $nout++; } else { ! warn "ERROR: missing bases for $defline\n"; if ($self->handler()->{failonerror}) { warn "FAILING: $chrIn $featset \n"; return undef; } # write at least one dummy base so user soft wont screw up ! print $fah ">$defline; ERROR missing data\nN\n" if($self->config->{writeemptyrecords}); #? write to file or not } *************** *** 579,584 **** if ($md5) { # maybe calc and compare if want to verify ? ! } else { ! require Digest::MD5; my $md5sum= Digest::MD5->new; if($md5sum) { --- 546,551 ---- if ($md5) { # maybe calc and compare if want to verify ? ! } elsif(ref($basesref)) { ! require Digest::MD5; #? problem my $md5sum= Digest::MD5->new; if($md5sum) { *************** *** 591,595 **** } ! if ($addcrc64) { require Bio::GMOD::Bulkfiles::SWISS_CRC64; my $crc64sum= SWISS_CRC64->new; --- 558,562 ---- } ! if ($addcrc64 and ref($basesref)) { require Bio::GMOD::Bulkfiles::SWISS_CRC64; my $crc64sum= SWISS_CRC64->new; *************** *** 659,688 **** $types_info->{dotranslate}); ## add optional md5checksum, SwissProt CRC64 calcs; my @crcs= $self->getCRCs( $id, \$bases, \@notes); ! my $header= $self->fastaHeader( type => $retype->{$type}||$type, name => $name, chr => $chr, location => $shortloc, ID => $id, db_xref => $dbxref, $org ? (species => $org) : (), $rel ? (release => $rel) : (), @notes, @crcs, ); if ($bases) { ! my $slen= length($bases); $bases =~ s/(.{1,50})/$1\n/g; ! return ">$header; len=$slen\n".$bases; } else { ! warn "ERROR: missing bases for $header\n"; if ($self->handler()->{failonerror}) { warn "FAILING: $featset \n"; return undef; } # write at least one dummy base so user soft wont screw up ! return ">$header; ERROR missing data\nN\n" if($self->config->{writeemptyrecords}); #? write to file or not } } =item fastaFromDb --- 626,659 ---- $types_info->{dotranslate}); + my $seqlen= $bases ? length($bases) : 0; ## add optional md5checksum, SwissProt CRC64 calcs; my @crcs= $self->getCRCs( $id, \$bases, \@notes); ! my $defline= $self->fastaHeader( type => $retype->{$type}||$type, name => $name, chr => $chr, location => $shortloc, ID => $id, db_xref => $dbxref, $org ? (species => $org) : (), $rel ? (release => $rel) : (), + len => $seqlen, @notes, @crcs, ); if ($bases) { ! #my $slen= length($bases); $bases =~ s/(.{1,50})/$1\n/g; ! return ">$defline\n".$bases; # ; len=$slen } else { ! warn "ERROR: missing bases for $defline\n"; if ($self->handler()->{failonerror}) { warn "FAILING: $featset \n"; return undef; } # write at least one dummy base so user soft wont screw up ! return ">$defline; ERROR missing data\nN\n" if($self->config->{writeemptyrecords}); #? write to file or not } } + + =item fastaFromDb *************** *** 751,755 **** my $feature_id = $$nextrow{'feature_id'}; my $bases= $$nextrow{'residues'}; ! my $seqlen= length($bases); ## $$nextrow{'seqlen'}; # my ($start,$stop,$strand)= (1,$seqlen,0); --- 722,726 ---- my $feature_id = $$nextrow{'feature_id'}; my $bases= $$nextrow{'residues'}; ! my $seqlen= length($bases) || 0; ## $$nextrow{'seqlen'}; # my ($start,$stop,$strand)= (1,$seqlen,0); *************** *** 771,775 **** } ! my $header= $self->fastaHeader( type => $retype->{$type}||$type, name => $name, ID => $id, db_xref => $dbxref, --- 742,746 ---- } ! my $defline= $self->fastaHeader( type => $retype->{$type}||$type, name => $name, ID => $id, db_xref => $dbxref, *************** *** 777,796 **** $org ? (species => $org) : (), $rel ? (release => $rel) : (), @notes, @crcs, ); ! print STDERR "fastaFromDb[$nout]=$header\n" if ($DEBUG && $nout<4); if ($bases) { $bases =~ s/(.{1,50})/$1\n/g; ! print $outhandle ">$header; len=$seqlen\n".$bases; } else { ! warn "ERROR: missing bases for $header\n"; if ($self->handler()->{failonerror}) { warn "FAILING: $featset \n"; return -1; } # write at least one dummy base so user soft wont screw up ! print $outhandle ">$header; ERROR missing data\nN\n" if($self->config->{writeemptyrecords}); #? write to file or not } --- 748,769 ---- $org ? (species => $org) : (), $rel ? (release => $rel) : (), + len => $seqlen, @notes, @crcs, ); ! print STDERR "fastaFromDb[$nout]=$defline\n" if ($DEBUG && $nout<4); if ($bases) { $bases =~ s/(.{1,50})/$1\n/g; ! print $outhandle ">$defline\n"; # ; len=$seqlen ! print $outhandle $bases; } else { ! warn "ERROR: missing bases for $defline\n"; if ($self->handler()->{failonerror}) { warn "FAILING: $featset \n"; return -1; } # write at least one dummy base so user soft wont screw up ! print $outhandle ">$defline; ERROR missing data\nN\n" if($self->config->{writeemptyrecords}); #? write to file or not } *************** *** 889,894 **** warn "raw2Fasta: wont overwrite $fastafile"; return $fastafile; } ! my $ap= ($append) ? ">>" : ">"; ! my $outh= new FileHandle("$ap$fastafile"); my $org= $self->handler()->{config}->{org}; my $rel= $self->handler()->{config}->{rel}; --- 862,867 ---- warn "raw2Fasta: wont overwrite $fastafile"; return $fastafile; } ! my $write= ($append) ? ">>" : ">"; ! my $outh= new FileHandle("$write$fastafile"); my $org= $self->handler()->{config}->{org}; my $rel= $self->handler()->{config}->{rel}; *************** *** 906,910 **** unless ($end>=$start) { $end= $start; } # what ? my $id= ($fullchr) ? $chr : "$chr:$start..$end"; ! #?? add: $self->getCRCs( $id, \$bases, \@notes); ## but need to read dna 1st; revise crc to do add-lines --- 879,883 ---- unless ($end>=$start) { $end= $start; } # what ? my $id= ($fullchr) ? $chr : "$chr:$start..$end"; ! my $seqlen= 1 + $end - $start; #?? add: $self->getCRCs( $id, \$bases, \@notes); ## but need to read dna 1st; revise crc to do add-lines *************** *** 917,920 **** --- 890,894 ---- $org ? (species => $org) : (), $rel ? (release => $rel) : (), + len => $seqlen, ) unless $defline; *************** *** 933,937 **** else { ! unless ($end>=$start) { $end= $start; } # what ? $defline= $self->fastaHeader( ID => "$chr:$start..$end", --- 907,911 ---- else { ! unless ($end>=$start) { $end= $start; } # what ? $defline= $self->fastaHeader( ID => "$chr:$start..$end", *************** *** 941,946 **** $org ? (species => $org) : (), $rel ? (release => $rel) : (), ) unless $defline; ! print $outh ">$defline\n"; } print $outh "\n"; --- 915,921 ---- $org ? (species => $org) : (), $rel ? (release => $rel) : (), + len => 0, ) unless $defline; ! print $outh ">$defline\nN\n"; # one N base for empties } print $outh "\n"; *************** *** 983,987 **** my @retype= (); my $type_info= {}; ! $config = $self->handler->{config} unless($config); if(!$config->{featmap}->{$featset} && $featset =~ /^(\w+)_extended(\d+)$/) { --- 958,962 ---- my @retype= (); my $type_info= {}; ! $config = $self->handler_config unless($config); if(!$config->{featmap}->{$featset} && $featset =~ /^(\w+)_extended(\d+)$/) { *************** *** 991,998 **** if (defined $config->{featmap}->{$featset}) { my $fm= $config->{featmap}->{$featset}; ! @ft= split(/[\s,;]/, $fm->{types} || $featset ); #? @{$fm->{types}}; @retype= split(/[\s,;]/, $fm->{typelabel}) if ($fm->{typelabel}); $fromdb= $fm->{fromdb} || 0; $subrange= $fm->{subrange} || $subrange; if ($fm->{method} eq 'between') { $fm->{proc}= '&intergeneFromFFF2'; ## FIXME --- 966,974 ---- if (defined $config->{featmap}->{$featset}) { my $fm= $config->{featmap}->{$featset}; ! @ft= split(/[\s,;]/, $fm->{types} || $featset ); @retype= split(/[\s,;]/, $fm->{typelabel}) if ($fm->{typelabel}); $fromdb= $fm->{fromdb} || 0; $subrange= $fm->{subrange} || $subrange; + $fm->{method} ||= ""; if ($fm->{method} eq 'between') { $fm->{proc}= '&intergeneFromFFF2'; ## FIXME *************** *** 1002,1005 **** --- 978,982 ---- } else { + $type_info->{method} ||= ""; CASE: { $featset =~ /^(gene|pseudogene)$/ && do { @ft=($featset); $type_info->{get_id}=1; last CASE; }; *************** *** 1021,1024 **** --- 998,1002 ---- } + foreach (@ft) { if(s/^(["'])//){s/$1$//} } $fromdb= 0 if $self->handler()->{ignoredbresidues}; my %types_ok= map { $_,1; } @ft; |