From: Don G. <don...@us...> - 2005-02-08 20:05:08
|
Update of /cvsroot/gmod/schema/XMLTools/ChadoSax/src/org/gmod/chado/ix In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5226/src/org/gmod/chado/ix Modified Files: IxReadSax.pm ToAcode.pm Log Message: dpse/synteny updates Index: IxReadSax.pm =================================================================== RCS file: /cvsroot/gmod/schema/XMLTools/ChadoSax/src/org/gmod/chado/ix/IxReadSax.pm,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** IxReadSax.pm 19 Apr 2004 21:23:16 -0000 1.4 --- IxReadSax.pm 8 Feb 2005 20:03:59 -0000 1.5 *************** *** 49,52 **** --- 49,53 ---- rearranged to make them a bit more usable objects. + =head1 SEE ALSO *************** *** 75,87 **** use XML::Parser::PerlSAX; use Exporter; ! use vars qw/$VERSION $ROOT_NODE $debug @skipKeys @ISA @EXPORT /; @ISA = qw(Exporter); @EXPORT = qw(&view); ! $VERSION = "0.5"; $ROOT_NODE='chado'; $debug= 0; BEGIN { # some of these are useful - later // timelastmodified # dec03 - drop residues from skipset - let user decide w/ -skip opt --- 76,94 ---- use XML::Parser::PerlSAX; use Exporter; ! use vars qw/$VERSION $ROOT_NODE $debug @skipKeys $featmainpatt $featgenpatt @ISA @EXPORT /; @ISA = qw(Exporter); @EXPORT = qw(&view); ! $VERSION = "0.6"; $ROOT_NODE='chado'; $debug= 0; BEGIN { + $featmainpatt='feature|feature_evidence|prediction_evidence|alignment_evidence'; + $featgenpatt='analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism'; + ## jan05 added feature_relationship + # /^(analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism |feature_relationship)$/ && do { + # analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism + # some of these are useful - later // timelastmodified # dec03 - drop residues from skipset - let user decide w/ -skip opt *************** *** 98,104 **** is_fmin_partial is_fmax_partial evidence_id - organism_id _appdata /; } --- 105,111 ---- is_fmin_partial is_fmax_partial evidence_id _appdata /; + # organism_id } *************** *** 256,259 **** --- 263,273 ---- } + sub parent_node { + my $self= shift; + my $upto= shift; $upto= -1 unless(defined $upto); + my $parft= @{$self->{featstack}}[$upto]; + return $parft; + } + sub getTopNode; *getTopNode = \&topnode; # java alias *************** *** 287,295 **** added fmin, fmax replacements for nbeg, nend need to catch these containers: ! feature_synonym feature_relationship feature_dbxref ? feature_cvterm ? feature_synonym > should become an attribute of parent feature ? (encloses synonym, synonym_id tags) =item XML handlers --- 301,319 ---- added fmin, fmax replacements for nbeg, nend need to catch these containers: ! feature_synonym feature_dbxref ? feature_cvterm ? feature_synonym > should become an attribute of parent feature ? (encloses synonym, synonym_id tags) + JAN 05: + for synteny features, handle 'gene_syntenic_block' like + prediction_evidence alignment_evidence ? + + need feature_relationship for cv type_id = putative_ortholog_of + with data in object_id/feature/ + feature_relationship/ + object_id/feature/ organism=xxx, cvterm=gene, uniquename=geneID + type_id/ name=putative_ortholog_of + =item XML handlers *************** *** 342,346 **** $self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; ! ## test sep03 if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) { $self->{handleObj}->handleObj(0, $ft, $err); --- 366,370 ---- $self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; ! ## sep03 if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) { $self->{handleObj}->handleObj(0, $ft, $err); *************** *** 370,375 **** feature feature_evidence prediction_evidence alignment_evidence analysis feature_cvterm cvterm cv db pub ! synonym organism ! dbxref featureprop feature_synonym feature_dbxref ); --- 394,399 ---- feature feature_evidence prediction_evidence alignment_evidence analysis feature_cvterm cvterm cv db pub ! synonym organism feature_relationship ! dbxref featureprop feature_synonym feature_dbxref ); *************** *** 388,396 **** $self->{elcount}{$_}++; # debug only return if ($self->{skipkids}); ! my $nada= 0; SWITCH: { ! /^($ROOT_NODE|feature|feature_evidence|prediction_evidence|alignment_evidence|analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism)$/ && do { my $atid= $element->{Attributes}->{id}; --- 412,420 ---- $self->{elcount}{$_}++; # debug only return if ($self->{skipkids}); ! my $nada= 0; SWITCH: { ! /^($ROOT_NODE|$featmainpatt|$featgenpatt|feature_relationship)$/ && do { # my $atid= $element->{Attributes}->{id}; *************** *** 400,410 **** if (/$ROOT_NODE/ && $self->{$ROOT_NODE}) { $ft= $self->{$ROOT_NODE}; - # $self->{idhash}{$atid}= $self->{$ROOT_NODE} if $atid; - # last SWITCH; } else { $ft= new org::gmod::chado::ix::IxFeat( tag => $_, id => $atid, handler => $self); ! } $self->{$_}= $ft; --- 424,432 ---- if (/$ROOT_NODE/ && $self->{$ROOT_NODE}) { $ft= $self->{$ROOT_NODE}; } else { $ft= new org::gmod::chado::ix::IxFeat( tag => $_, id => $atid, handler => $self); ! } $self->{$_}= $ft; *************** *** 416,420 **** # these are main feature objects... ! if (/^($ROOT_NODE|feature|feature_evidence|prediction_evidence|alignment_evidence)$/) { #|analysis $self->{curfeat}= $ft; push( @{$self->{featstack}}, $ft); --- 438,442 ---- # these are main feature objects... ! if (/^($ROOT_NODE|$featmainpatt)$/) { #|analysis $self->{curfeat}= $ft; push( @{$self->{featstack}}, $ft); *************** *** 424,427 **** --- 446,452 ---- $self->{srcfeature_id}= $atid; # save } + if (/^feature$/ && $elpar eq 'object_id') { #?? for odd feature_relationship + $self->{object_id}= $atid; # save + } last SWITCH; *************** *** 433,445 **** }; - # # handle cv_id ref to cv - has id attr if cv/cvname is defined - # /^(cv_id)$/ && do { - # my $atid= $element->{Attributes}->{id}; - # if ($atid) { - # # my $cv= $self->{cvlist}{$atid}; #?? - # # my $cvset= $cv->get('cvname'); - need to prefix cvterm/name w/ cvset? - # } - # last SWITCH; - # }; /^(featureloc)$/ && do { --- 458,461 ---- *************** *** 459,462 **** --- 475,479 ---- ## feature_cvterm is handled ok by nested cvterm/dbxref == GO terms + ids ## or handle like featureprop ? + ## organism| was here.. see above genfeat /^(dbxref|featureprop|feature_synonym|feature_dbxref)$/ && do { *************** *** 510,517 **** /^(type_id)$/ && do { # //CAN BE INSIDE pub OR feature OR feature_relationship OR synonym ! if ($hasval ) { # && $val ne 'contains' ! if ($elpar eq 'feature_relationship') { ! ## skip } elsif ($elpar =~ /^(dbxref|featureprop|feature_synonym|feature_dbxref)$/) { ## attributes now use type_id --- 527,544 ---- /^(type_id)$/ && do { # //CAN BE INSIDE pub OR feature OR feature_relationship OR synonym ! if ( $hasval ) { # && $val ne 'contains' ! ! ##if ($elpar eq 'feature_relationship') { ! if ($self->{curgenfeat}->{tag} =~ /(feature_relationship)/) { ! ## skip? jan05 : need some of these ! ##$self->{$elpar}->set( $_ => $val ) ; ! #if ( $self->{curgenfeat}->get('object_id') ) { ! $self->{curgenfeat}->set( $_ => $val ) ; ! #} } + # elsif ($elpar =~ /^(feature_relationship)$/) { + # ## skip? jan05 : need some of these + # } + elsif ($elpar =~ /^(dbxref|featureprop|feature_synonym|feature_dbxref)$/) { ## attributes now use type_id *************** *** 536,541 **** last SWITCH; }; ! /^(name|uniquename|miniref|program|sourcename|programversion|seqlen|cvterm_id|analysis_id|timelastmodified)$/ && do { ## general feature values - should be only 1/feature if ($hasval) { if ($self->{curgenfeat}) { --- 563,569 ---- last SWITCH; }; ! /^(name|uniquename|miniref|program|sourcename|programversion|seqlen|cvterm_id|object_id|analysis_id|timelastmodified)$/ && do { ## general feature values - should be only 1/feature + unless( $hasval ) { $val= $self->{$_}; $hasval= ($val =~ /\S/); } if ($hasval) { if ($self->{curgenfeat}) { *************** *** 566,569 **** --- 594,602 ---- last SWITCH; }; + /^(genus|species)$/ && do { + if ($hasval && $self->{organism}) { + $self->{organism}->set( $_ => $val ); + } + last SWITCH; }; ## //PUTTING PARAMETERS INTO OBJECTS *************** *** 572,576 **** # synonym not top level... ! /^(organism|pub|cvterm|cv|db|analysis|feature_cvterm|synonym)$/ && do { ## // top level chado params ## here, if cvterm and cv_id has id attrib, add link to cv? --- 605,609 ---- # synonym not top level... ! /^($featgenpatt|feature_relationship)$/ && do { ## // top level chado params ## here, if cvterm and cv_id has id attrib, add link to cv? *************** *** 585,590 **** $self->{vals}{$elpar}= $ft->{id}; } ! ! if (/(synonym|feature_cvterm)$/) { $self->{curfeat}->add($ft); } --- 618,628 ---- $self->{vals}{$elpar}= $ft->{id}; } ! ! if (/(feature_relationship)$/) { # ! my ($tag,$rtype) = $self->{$ROOT_NODE}->id2name('type_id', $ft->{type_id}); ! $self->{curfeat}->add($ft) ! if (scalar(%$ft) && $rtype !~ /^(partof|producedby)$/); ! } ! elsif (/(synonym|feature_cvterm)$/) { $self->{curfeat}->add($ft); } *************** *** 599,605 **** last SWITCH; }; ! /^(feature|feature_evidence|prediction_evidence|alignment_evidence)$/ && do { #|analysis my $ft= pop( @{$self->{featstack}}); my $parft= @{$self->{featstack}}[-1]; $parft->add($ft); $self->{$_}= undef; --- 637,644 ---- last SWITCH; }; ! /^($featmainpatt)$/ && do { #|analysis my $ft= pop( @{$self->{featstack}}); my $parft= @{$self->{featstack}}[-1]; + $ft->{parnode}= $parft; #?? $parft->add($ft); $self->{$_}= undef; *************** *** 609,613 **** $self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; ! ## test sep03 if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) { $self->{handleObj}->handleObj(0, $ft); --- 648,652 ---- $self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; ! ## sep03 if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) { $self->{handleObj}->handleObj(0, $ft); *************** *** 655,683 **** last SWITCH; }; - ## //ATTRIB - - # /^(feature_dbxref)$/ && do { - # $self->{curfeat}->addattr($self->{$_}); - # $self->{$_}= undef; - # last SWITCH; }; - - # /^(dbxref_id)$/ && do { - # my $attr= $self->{$_}; - # $attr->setattr( $val) if $hasval; #? and not get() ? - # - # # my $fattr= $self->{feature_dbxref}; - # # if ($fattr) { - # # $fattr->set( attr => $attr); - # # # //THIS dbxref_id IS INSIDE A feature_dbxref - # # # //IT IS NOW SUBSUMED - # # } - # # elsif - # if ( $self->{curfeat} ) { - # # //ADD TO FEATURE - # $self->{curfeat}->addattr($attr); - # } - # $self->{$_}= undef; - # last SWITCH; }; - /^(dbxref)$/ && do { # now also in attrstack --- 694,697 ---- *************** *** 696,706 **** $self->setAttrs($attr, qw(is_current is_internal is_analysis)); - # my $is_current= $self->{is_current}; - # $attr->set( is_current => $is_current) if defined $is_current; - # $self->{is_current}= undef; - # my $dbxattr= $self->{dbxref_id}; - # if ($dbxattr) { $dbxattr->setattr( $attr ); } - # elsif if ( $self->{curgenfeat} ) { $self->{curgenfeat}->addattr($attr); --- 710,714 ---- *************** *** 719,722 **** --- 727,731 ---- $self->{$_}= undef; last SWITCH; }; + /^(feature_synonym)$/ && do { ## or synonym -- done by synonym == par->add(ft) *************** *** 727,745 **** ## synonym_id always seems to enclose only synonym struct - # # my $synonym_id= $self->{synonym_id}; - # $attr->set( synonym_id => $synonym_id) if $synonym_id; - # $self->{synonym_id}= undef; - # ##?? change id=feature_synonym_1894 to id=synonym_1894 - # # ## synonym struct is: id,name,type_id - # # my $synonym= $self->{synonym}; - # # $attr->set( synonym => $synonym) if $synonym; - # - # my $pub_id= $self->{pub_id}; - # $attr->set( pub_id => $pub_id) if $pub_id; - # - # my $is_current= $self->{is_current}; - # $attr->set( is_current => $is_current) if defined $is_current; - # $self->{is_current}= undef; if ( $self->{curgenfeat} ) { # was curfeat --- 736,740 ---- Index: ToAcode.pm =================================================================== RCS file: /cvsroot/gmod/schema/XMLTools/ChadoSax/src/org/gmod/chado/ix/ToAcode.pm,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** ToAcode.pm 19 Apr 2004 21:23:16 -0000 1.4 --- ToAcode.pm 8 Feb 2005 20:04:17 -0000 1.5 *************** *** 82,85 **** --- 82,88 ---- use Getopt::Long; use constant IDXRECSIZE => length(pack("NN", 1, 50000)); # store as unsigned long, unsigned long + + + ## FBTI_IDBASE is no longer valid .. aug04 use constant FBTI_IDBASE => 50000; ## FIXME - quick fix for TE/FBti id in FBan form *************** *** 112,115 **** --- 115,123 ---- $GeneDbName= 'FlyBase:'; + # my $sppabbrev={ + # Drosophila_pseudoobscura => 'dpse', + # Drosophila_melanogaster => 'dmel', + # }; + @transcript_types= qw( \w+RNA protein pseudogene ); @non_transcript_types = qw(DNA_motif aberration_junction enhancer insertion_site *************** *** 117,121 **** rescue_fragment sequence_variant ); ! @ignore_feature_types = qw(exon chromosome_arm); $transcript_types= join('|', @transcript_types); --- 125,129 ---- rescue_fragment sequence_variant ); ! @ignore_feature_types = qw(exon chromosome_arm ultra_scaffold golden_path_region); $transcript_types= join('|', @transcript_types); *************** *** 152,164 **** sub acode { ! ! my $outfh= *STDOUT; ! my $ftout= undef; ! my $faout= undef; my $fnout= undef; ! my $outf= undef; ! my $ftoutf= undef; my $doindex= 0; my @skipf=(); - my $fbgn2id= undef; my $optok= Getopt::Long::GetOptions( --- 160,170 ---- sub acode { ! ! # file names ! my ($outf, $outte, $ftout, $fbgn2id ); ! # file 'handles' ! my ($outfh, $outteh, $ftoutf, $faout, $fnout); my $doindex= 0; my @skipf=(); my $optok= Getopt::Long::GetOptions( *************** *** 166,169 **** --- 172,176 ---- 'index!' => \$doindex, 'outfile=s' => \$outf, + 'tefile=s' => \$outte, 'featfile=s' => \$ftoutf, 'fbgn2id=s' => \$fbgn2id, *************** *** 205,208 **** --- 212,216 ---- } + $outfh= *STDOUT; if ($outf) { # if (-r $outf) -- what ? append? *************** *** 223,229 **** $fnout= *MRNAF; } ! my $outhand= new org::gmod::chado::ix::ToAcode( outh => $outfh, ftout => $ftout, faout => $faout, --- 231,249 ---- $fnout= *MRNAF; } ! ! unless ($outte) { ! if ($outf) { ($outte= $outf) =~ s/\./-TE\./; } ! # transposable_element have separate ID class - FBti ! elsif ($ftoutf) { $outte= $ftoutf; $outte =~ s/\.[\.\w]*$/-TE\.acode/; } ! } ! if ($outte) { ! open(OUTTE,">>$outte") or die "error writing $outte"; ! $outteh= *OUTTE; ! } ! else { $outteh= *STDOUT;} ! my $outhand= new org::gmod::chado::ix::ToAcode( outh => $outfh, + outteh => $outteh, ftout => $ftout, faout => $faout, *************** *** 242,245 **** --- 262,266 ---- close($outfh) if ($outf); + close($outteh) if ($outteh); close($ftout) if ($ftout); close($faout) if $faout; *************** *** 247,252 **** if ($doindex && $outf) { ! @ARGV= ($outf); ! acodeindex(); } } --- 268,275 ---- if ($doindex && $outf) { ! indexAcode( $outf, "FBan", "FBgn"); ! } ! if ($doindex && $outte) { ! indexAcode( $outte, "FBti"); } } *************** *** 267,270 **** --- 290,297 ---- unless(-r $file) { die "acodeindex: cannot index $file\n"; } indexAcode( $file, "FBan", "FBgn"); + + ## dang FBti problem + (my $outte= $file) =~ s/\./-TE\./; # transposable_element have separate ID class - FBti + if (-e $outte) { indexAcode( $outte, "FBti"); } } *************** *** 290,293 **** --- 317,321 ---- $self->{tag}= 'ToAcode' unless (exists $self->{tag} ); $self->{outh}= *STDOUT unless (exists $self->{outh} ); + $self->{outteh}= *STDOUT unless (exists $self->{outteh} ); $self->{VERS}= $DATA_VERSION unless (exists $self->{VERS} ); $self->{ftd}= {}; *************** *** 295,298 **** --- 323,327 ---- $self->{mrnaresid}= {}; $self->{evd}= {}; + $self->{synt}= {}; $debug= $self->{debug} if $self->{debug}; } *************** *** 342,346 **** --- 371,414 ---- } + ## for IxBase + ## my($tag,$forg)= $self->orgid2name($dft,'organism_id', $dft->{organism_id}); + sub orgid2name { + my $self= shift; + my ($ob, $k,$v)= @_; + if ($k =~ /_id$/) { + my $ft= $ob->{handler}->{idhash}{$v}; + if ($ft) { + ##_debugObj("organism",$ft); + #my @attr= $ft->getattrs('genus','species'); + my @attr=( $ft->{'genus'}, $ft->{'species'} ); + if (@attr) { + # $v= join('_',@attr); + $v= substr(lc($ft->{'genus'}),0,1) . substr(lc($ft->{'species'}),0,3); + $k =~ s/_id$/_name/; + } + + + # my $gns= $ft->{'genus'}; my $spp= $ft->{'species'}; + #if ($gns || $spp) { + # $v= $gns .'_' . $spp; ## need configurable abbrevs. of species ! + # $k =~ s/_id$/_name/; + # } + } + } + return ($k,$v); + } + + sub _debugObj + { + my ($name,$obj)= @_; + require Data::Dumper; + $Data::Dumper::Maxdepth = 3; + my $dd = Data::Dumper->new([$obj]); $dd->Terse(1); + $dd->Seen( [ { '*handler' => 1} ]); + #$dd->Maxdepth(3); + + print STDERR "DEBUG obj: $name=", $dd->Dump(),"\n"; + } # sub getMiscFeatureLoc { *************** *** 370,377 **** --- 438,463 ---- =cut + use vars qw/ $lastob $parob /; + sub handleObj { my $self = shift; my $depth= shift; my $ob = shift; + eval { $self->handleObj_base($depth,$ob,@_); }; + if ($@) { + my $err=$@; + my $lob= (ref($lastob)) ? $lastob->name() : 'null'; # $ob->name(); + warn "#>> handleObj lastob=$lob; lev=$depth; error: $err\n"; + _debugObj("object",$ob); + ##_debugObj("last-object",$lastob); + ## DEBUG obj: object={} + } + $lastob= $ob; + } + + sub handleObj_base { + my $self = shift; + my $depth= shift; + my $ob = shift; my $parsestate= shift; #? skip these *************** *** 391,394 **** --- 477,481 ---- my $ftd= $self->{ftd}; my $evd= $self->{evd}; + ##my $synt= $self->{synt}; # like evd; but needs sep. acode subrec my $aaresid= $self->{aaresid}; #? keep in $ftd my $mrnaresid= $self->{mrnaresid}; #? keep in $ftd *************** *** 401,404 **** --- 488,492 ---- delete $self->{ftd}; delete $self->{evd}; + delete $self->{synt}; $ftd= $self->{ftd}= {}; #?? clear *************** *** 406,409 **** --- 494,498 ---- $mrnaresid= $self->{mrnaresid}= {}; #?? clear $evd= $self->{evd}= {}; #?? clear + $self->{synt}= {}; #?? clear $trn= $ftd->{trn}= {}; $self->{cdsloc}= ''; *************** *** 520,523 **** --- 609,613 ---- ($otag =~ /^feature$/ && $depth == 2) && do { + my $ftype= 0; if ($otype =~ /^(protein)$/ ) { #&& $depth == 2 ?! need parent.otype == mRNA # $otype= 'CDS'; *************** *** 546,551 **** } ! elsif ($otype !~ /^(exon|chromosome_arm)$/) { ! warn "MISSED feature=$otype [lev=$depth] ".$obname." \n"; # if $debug; } --- 636,647 ---- } ! elsif ($otype =~ m/^(intron|CDS)$/) { $ftype= -1; } ! ## from dpse chado reporting db: ! ## CDS = redundant w/ protein; ! ## intron = redundant w/ getlocationString('intron') ! ! elsif ($otype =~ m/^($ignore_feature_types)$/) { $ftype= -1; } ! else { ##if ($otype !~ /^(exon|chromosome_arm|ultra_scaffold)$/) # == $ignore_feature_types ! warn "MISSED feature=$otype [lev=$depth] ".$obname.','.$ob->{id}." \n"; } *************** *** 553,556 **** --- 649,708 ---- }; + ## jan05 - for dpse; see syntAcode + ( $otag =~ /^feature$/ && $depth == 1 + && $otype =~ m/^(syntenic_region|orthologous_region)$/ ) + && do { + ## has feature.feature. substructure + last SWITCH if ($ob->get('is_internal') == 1); + + my $synt= $self->{synt}; + + my $anid= $obuniqid; ##$ob->get('id'); # feature id + my $anh= $synt->{$anid}= {}; + ##unless(defined $anh) { $synt->{$anid}= $anh= {}; } + + #? my $obuniqid= $ob->{uniquename} || undef; + $anh->{NAM} = $obuniqid; + $anh->{TYPE}= $otype if ($otype); + + # my($prg,$pdb)=("synteny","dummy"); #$anh->{PROGRAM},$anh->{DATABASE}); + # warn "# synt $otag:$otype = $anid \n" if ($debug); + + ## not right ... + ##my @fts= $ob->getfeats('feature'); + ##if (@fts) { + ##my $ft= shift @fts; # 1 top level, 2 or 3 subfeature + { + my $ft= $ob; + my $dt= cleandate($ft->{timelastmodified}); + $anh->{DT} = "$dt" if $dt; + + my @fts= $ft->getfeats('feature'); # find synteny components + foreach my $dft (@fts) { + + # need spp/chr:location from each feature here ?? + my $fchr= $dft->{uniquename} || undef; #? $dft->name(); + my $floc= $ft->getlocation( $dft->{id}); + + # my($tag,$forg)= $dft->id2name('organism_id', $dft->{organism_id}); + my($tag,$forg)= $self->orgid2name($dft,'organism_id', $dft->{organism_id}); + + $floc= $fchr . ':' . $floc; + $floc= $forg . '/' . $floc if ($forg); + # $floc= $forg .'/' . $fchr . ':' . $floc; + + $anh->{mRNA} .= $floc.$RECSEP; + $anh->{mRNA_chr} .= $fchr.$RECSEP; #?? + + # my($tag,$type)= $dft->id2name('type_id',$dft->{type_id}); + # $anh->{mRNA_type} .= $type.$RECSEP if ($type); + + } + } + + $dolist= $doatts= 0; # no more recurse ?? + last SWITCH; + }; + ($otag =~ /^feature$/ && $depth == 1) && do { *************** *** 566,570 **** elsif ($otype =~ m/^($ignore_feature_types)$/) { $ftype= -1; } else { ! warn "MISSED feature=$otype [lev=$depth] ".$obname." \n"; } --- 718,722 ---- elsif ($otype =~ m/^($ignore_feature_types)$/) { $ftype= -1; } else { ! warn "MISSED feature=$otype [lev=$depth] ".$obname.','.$ob->{id}." \n"; } *************** *** 614,617 **** --- 766,772 ---- if ($aaloc) { $ob->insertlocations($aaloc, $locs); ## do utr math also + ## ^^ handles transspliced items, and also adds + ## $loca->{five_prime_UTR}= \@head; $loca->{three_prime_UTR}= \@tail; + $loc = $ob->getlocationString($aaloc); # if $aaloc; *************** *** 658,662 **** # }; ! ($otag =~ /^feature_dbxref$/ && $depth < 4) && do { last SWITCH if ($ob->get('is_internal') == 1); --- 813,817 ---- # }; ! ($otag =~ /^feature_dbxref$/ && $depth < 4) && do { last SWITCH if ($ob->get('is_internal') == 1); *************** *** 672,676 **** # warn "DEBUG: feature_dbxref=$db:$attr, $dbattr\n" if $debug; if ($depth == 1) { ! if ($iscur == 0) { $ftd->{ID2} .= $attr .$RECSEP; } else { ## patch here? to replace FBgn 2ndary ids w/ primary --- 827,833 ---- # warn "DEBUG: feature_dbxref=$db:$attr, $dbattr\n" if $debug; if ($depth == 1) { ! if ($attr =~ /^(SO|GO):/) { } ! elsif ($attr =~ /^(X|2L|2R|3L|3R|4)$/) { } ! elsif ($iscur == 0) { $ftd->{ID2} .= $attr .$RECSEP; } else { ## patch here? to replace FBgn 2ndary ids w/ primary *************** *** 682,687 **** elsif ($attr =~ /^C[GR]\d/ ) { $ftd->{CGSYM}= $attr; } elsif ($attr =~ /^TE\d/ ) { $ftd->{CGSYM} = $attr; } - elsif ($attr =~ /^(SO|GO):/) { } - elsif ($attr =~ /^(X|2L|2R|3L|3R|4)$/) { } else { $notdone=1; } } --- 839,842 ---- *************** *** 694,697 **** --- 849,854 ---- if ($notdone==1) { warn "MISSED feature_dbxref=$db:$attr\n"; # if $debug; + ## dpse chadoxml: + ## MISSED feature_dbxref=GB:AY190949 } } *************** *** 708,713 **** if (%fbgn2id && $attr =~ /FBgn/) { $attr= $fbgn2id{$attr} || $attr; } ! ## FIX for TE000 == FBti000 ! if ($iscur == 0) { $ftd->{ID2} .= $dbattr .$RECSEP; } elsif ($attr =~ /^C[GR]\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; } elsif ($attr =~ /^TE\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; }#or ANID? --- 865,871 ---- if (%fbgn2id && $attr =~ /FBgn/) { $attr= $fbgn2id{$attr} || $attr; } ! if ($attr =~ /^(X|2L|2R|3L|3R|4)$/) { } ! elsif ($attr =~ /^(SO|GO):/) { } ! elsif ($iscur == 0) { $ftd->{ID2} .= $dbattr .$RECSEP; } elsif ($attr =~ /^C[GR]\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; } elsif ($attr =~ /^TE\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; }#or ANID? *************** *** 715,720 **** --- 873,888 ---- elsif ($attr =~ /^(FBgn|FBti)/ && $db =~ m/FlyBase/i) { $ftd->{GID} = $attr; } elsif ($attr =~ /^GO/ && $db =~ m/GO/i) { } + elsif ($attr =~ /^FB\w\w\d+/ || $db =~ /^GB/) { + my @sym= ( $ftd->{DBX} ); + unless ( $self->checkForVal($attr,@sym) ) { $ftd->{DBX} .= $dbattr .$RECSEP; } + } else { warn "MISSED dbxref=$db,$attr,$dbattr\n"; # if $debug; + ## nov04 - add these + # MISSED dbxref=FlyBase,FBpp0074965,FlyBase:FBpp0074965 + # MISSED dbxref=FlyBase,FBtr0075204,FlyBase:FBtr0075204 + #MISSED dbxref=GB_protein,AAN11697.1,GB_protein:AAN11697.1 + #MISSED dbxref=GB,CG10160,GB:CG10160 + #MISSED dbxref=GO,GO:0008017,GO:0008017 ?? from what? -- drop #MISSED dbxref=FlyBase,CG9884,FlyBase:CG9884 -- r3.2.0-march replaces Gadfly *************** *** 758,761 **** --- 926,989 ---- + ## jan05 - need this for putative_ortholog_of relation + ($otag =~ /^feature_relationship$/ && $depth < 4) && do { + my $notdone= 1; + $dolist= $doatts= 0; # no more recurse ?? + last SWITCH if ($ob->get('is_internal') == 1); + + my ($tag,$rtype) = $ob->id2name('type_id', $ob->{type_id}); + last SWITCH if ($rtype =~ /^(partof|producedby)$/); + + my $anid= $ob->get('object_id'); + my $top = $ob->{parnode}; ## not there ##$ob->{handler}->parent_node($depth); + $top= $parob unless($top); + $top = $ob->topnode() unless($top); + my $anft= $top->getfeatbyid( $anid); + + my @fts= $ob->getfeats('feature'); # find relationship components; 1 only? + push(@fts, $anft) if $anft; + my $attr=''; + eval { + foreach my $dft (@fts) { + my($ftag,$ftype,$forg); + my $nm= $dft->name(); ## uniquename probably + ($ftag,$forg) = $self->orgid2name($dft,'organism_id', $dft->{organism_id}); + ($ftag,$ftype)= $dft->id2name('type_id', $dft->{type_id}); + $attr= "$forg/$ftype:$nm"; + + $ftd->{ORTHO} .= "$rtype=$attr".$RECSEP ; ## fixme NOT right field for this + ##$notdone= 0; + } + }; + warn "#>> feature_relationship error: $@\n" if $@; + + warn "MISSED feature_relationship=$attr id=$anid ($tag:$rtype) [lev=$depth]\n" if ($notdone); # && $debug + # _debugObj("feature_relationship",$ob); + + # MISSED feature_relationship= ,type_name,partof [lev=2] + # MISSED feature_relationship= ,type_name,producedby [lev=2] + + # chipmunk% ./chado2acode.sh tmpd/test.acode CH379060.top.xml + # ToAcode( CH379060.top.xml )..... + # IxReadSax parse <CH379060.top.xml> + # MISSED feature_relationship= ,type_name,partof [lev=1] + # MISSED feature=mRNA [lev=2] GA15475-RA + # MISSED feature_relationship= ,type_name,putative_ortholog_of [lev=1] + # MISSED feature=gene [lev=2] CG2822 + # MISSED feature_relationship= ,type_name,partof [lev=1] + # MISSED feature=mRNA [lev=2] GA21028-RA + # MISSED feature_relationship= ,type_name,putative_ortholog_of [lev=1] + # MISSED feature=gene [lev=2] CG8372 + + # CMT|comment=Range of putative ortholog from tblastn was extended + # |to cover the gene prediction + # |partof=/mRNA:GA15475-RA + # |putative_ortholog_of=/gene:CG2822 + + last SWITCH; + }; + + + ($otag =~ /^featureprop$/ && $depth<3) && do { last SWITCH if ($ob->get('is_internal') == 1); *************** *** 917,925 **** }; ($otag =~ /^alignment_evidence$/) && do { ## has feature.feature.feature. ... substructure last SWITCH if ($ob->get('is_internal') == 1); ! my $anid= $ob->get('analysis_id'); my $anh= $evd->{$anid}; --- 1145,1193 ---- }; + #--- nov04, dmel rel4 chado evidence types (prg,pdb); all type = match + ## Chado-FBan EVD PRG_DB list - Nov 04 + # egrep '^(PRG|DB)\b' fban-r4.acode | \ + # perl -ne'$p=$1 if(/PRG\|(\S+)/); if(/DB\|(\S+)/){$d=$1;$f{"${p}_${d}"}++;}\ + # END{foreach (sort keys %f){print"$_\t$f{$_}\n";}}' + # + # assembly_path 6076 + # blastx_masked_aa_SPTR.dmel 11754 + # blastx_masked_aa_SPTR.insect 5872 + # blastx_masked_aa_SPTR.othinv 7436 + # blastx_masked_aa_SPTR.othvert 5738 + # blastx_masked_aa_SPTR.plant 3649 + # blastx_masked_aa_SPTR.primate 6578 + # blastx_masked_aa_SPTR.rodent 6600 + # blastx_masked_aa_SPTR.worm 5376 + # blastx_masked_aa_SPTR.yeast 2509 + # genie_masked_dummy 9008 + # genscan_masked_dummy 11360 + # promoter_dummy 3542 + # repeat_runner_seg_dummy 1007 + # repeatmasker_dummy 1377 + # sim4_na_ARGs.dros 740 + # sim4_na_ARGsCDS.dros 716 + # sim4_na_DGC.in_process.dros 2351 == EST/cDNA + # sim4_na_HDP_RNAi.dmel 50 + # sim4_na_HDP_mRNA.dmel 66 + # sim4_na_dbEST.diff.dmel 7139 == EST/cDNA + # sim4_na_dbEST.same.dmel 8100 == EST/cDNA + # sim4_na_gadfly.dros.RELEASE2 11076 + # sim4_na_gb.dmel 8236 + # sim4_na_gb.tpa.dmel 236 + # sim4_na_smallRNA.dros 49 + # sim4_na_transcript.dmel.RELEASE31 11710 + # sim4_na_transcript.dmel.RELEASE32 11824 + # sim4tandem_na_gb.dmel 6700 + # tRNAscan-SE_dummy 265 + # tblastx_masked_na_dbEST.insect 5412 + # tblastxwrap_masked_na_baylorf1_scfchunk.dpse 11480 + # tblastxwrap_masked_na_scf_chunk_agambiae.fa 8695 + ($otag =~ /^alignment_evidence$/) && do { ## has feature.feature.feature. ... substructure last SWITCH if ($ob->get('is_internal') == 1); ! my $anid= $ob->get('analysis_id'); my $anh= $evd->{$anid}; *************** *** 932,936 **** $anft= $top->getfeatbyid( $anid); } - if ($anft) { $anh->{PROGRAM}= $anft->{program}; # program=blastx_masked --- 1200,1203 ---- *************** *** 940,949 **** $anh->{PROGRAM}= $aname; } # warn "# evd $otag: $anid = $anh->{PROGRAM} \n" if ($debug); } $dolist= $doatts= 0; # no more recurse ?? last SWITCH ! if ($anh->{PROGRAM} eq 'locator' && $anh->{DATABASE} eq 'cytology'); my @fts= $ob->getfeats('feature'); --- 1207,1219 ---- $anh->{PROGRAM}= $aname; } + # warn "# evd $otag: $anid = $anh->{PROGRAM} \n" if ($debug); } + my($prg,$pdb)=($anh->{PROGRAM},$anh->{DATABASE}); + $dolist= $doatts= 0; # no more recurse ?? last SWITCH ! if ($prg eq 'locator' && $pdb eq 'cytology'); my @fts= $ob->getfeats('feature'); *************** *** 961,964 **** --- 1231,1239 ---- next if ($dft->{id} eq $ftd->{srcid}); + my($tag,$type)= $dft->id2name('type_id',$dft->{type_id}); + $anh->{TYPE}= $type if ($type); + ## jun04 - need to filter out apollo dupl. evidence for + ## type = transposable_element_insertion_site + my $dbname= $dft->name(); my $dbx= $dbname; # default *************** *** 984,999 **** # $anh->{mRNA_dbx2} .= $dbname .';' ; ! my($tag,$type)= $dft->id2name('type_id',$dft->{type_id}); ! $anh->{TYPE}= $type if ($type); ! ! ## EST only if DB|na_EST.all_nr.dros and/or? DB|na_DGC.dros ! if ($type eq 'EST' && $anh->{DATABASE} =~ /\.dros/) { my $v= $dbname; $v =~ s/[:_\.].*//; $ftd->{EST} .= $v .$RECSEP unless($ftd->{EST} =~ m/$v$RECSEP/); } ! elsif ($type eq 'cDNA_clone') { ! my $v= $dbname; $ftd->{CDNA} .= $v .$RECSEP unless($ftd->{CDNA} =~ m/$v$RECSEP/); } elsif ($type eq 'oligonucleotide') { my $v= $dbname; $v =~ s/_at_\d+/_at/; --- 1259,1293 ---- # $anh->{mRNA_dbx2} .= $dbname .';' ; ! ## EST only if DB|na_EST.all_nr.dros and/or? DB|na_DGC.dros ! # dmel r4 -- need to recode blast PRG/DB types to get EST/cDNA ! # sim4_na_EST.all_nr.dros=EST -- old ! # sim4_na_dbEST.diff.dmel=EST ! # sim4_na_dbEST.same.dmel=EST ! # sim4_na_cDNA.dros=cDNA -- old ! # sim4_na_adh.cDNAs.dros=cDNA -- old ! # sim4_na_DGC.dros=DGC ! # sim4_na_DGC.in_process.dros=DGC ! ! if ($prg =~ m/(sim4|groupest)/ && $type =~ m/(match|alignment)/) { #? other types? ! if ($pdb=~/(EST)/) { $type='EST'; } ! elsif ($pdb=~/(cDNA)/) { $type='cDNA_clone'; } ! elsif ($pdb=~/(DGC)/) { $type='cDNA_clone'; } # or separate to DGC field? ! #? change acode type? ! # $anh->{TYPE}= $type if ($type); ! } ! ! ## EST only if DB|na_EST.all_nr.dros and/or? DB|na_DGC.dros ! if ($type eq 'EST' && $pdb =~ /\.(dmel|dros)/) { my $v= $dbname; $v =~ s/[:_\.].*//; $ftd->{EST} .= $v .$RECSEP unless($ftd->{EST} =~ m/$v$RECSEP/); } ! elsif ($type =~ /cDNA/) { ! my $v= $dbname; $v =~ s/[:_\.].*//; $ftd->{CDNA} .= $v .$RECSEP unless($ftd->{CDNA} =~ m/$v$RECSEP/); } + elsif ($type =~ /DGC/) { + my $v= $dbname; $v =~ s/[:_\.].*//; + $ftd->{DGC} .= $v .$RECSEP unless($ftd->{DGC} =~ m/$v$RECSEP/); + } elsif ($type eq 'oligonucleotide') { my $v= $dbname; $v =~ s/_at_\d+/_at/; *************** *** 1016,1030 **** --- 1310,1328 ---- ## includes dbxref , featureprop .. ? if ( $doatts && defined $ob->{attrlist} ) { + my $lastpar= $parob; $parob= $ob; foreach (@{$ob->{attrlist}}) { # $_->handleObj(1+$depth, $self); $self->handleObj(1+$depth, $_); } + $parob= $lastpar; } if ($dolist && defined $ob->{list} ) { + my $lastpar= $parob; $parob= $ob; foreach (@{$ob->{list}}) { # $_->handleObj(1+$depth, $self); $self->handleObj(1+$depth, $_); } + $parob= $lastpar; } ## recursion end ...... *************** *** 1033,1046 **** { my @elist= values( % $evd ); $self->{defline}=''; ! my $outh= $self->{outh}; ! my $ftout= $self->{ftout}; $ftd->{VERS}= $self->{VERS}; # data version from where? ! $self->fbanAcode( $outh, $ftd, $ftout,\@elist); ##, $FTOUT, \%ca); my $chr= $ftd->{ARM}; my $defline= $self->{defline}; # fbanAcode() makes this my $type='protein'; # $ftd->{TYPE} || << not 'gene' ! $outh= $self->{faout}; my $resid= $aaresid; if (scalar %$resid && $outh) { --- 1331,1345 ---- { my @elist= values( % $evd ); + my @synt = values( % {$self->{synt}} ); $self->{defline}=''; ! ##my $outh= $self->{outh}; ! ##my $ftout= $self->{ftout}; $ftd->{VERS}= $self->{VERS}; # data version from where? ! $self->fbanAcode1( $ftd, \@elist, \@synt); my $chr= $ftd->{ARM}; my $defline= $self->{defline}; # fbanAcode() makes this my $type='protein'; # $ftd->{TYPE} || << not 'gene' ! my $outh= $self->{faout}; my $resid= $aaresid; if (scalar %$resid && $outh) { *************** *** 1127,1131 **** sub fbanAcode { my $self = shift; ! my($outh, $ftd, $ftout, $ca)= @_; # array refs of keys, values my %h= %$ftd; --- 1426,1443 ---- sub fbanAcode { my $self = shift; ! my($outh, $ftd, $ftout, $evd)= @_; # array refs of keys, values ! $self->{outh}= $outh; ! $self->{ftout}= $ftout; ! return $self->fbanAcode1($ftd, $evd); ! } ! ! sub fbanAcode1 { ! my $self = shift; ! my( $ftd, $evd, $synt)= @_; # array refs of keys, values ! ! my $outh= $self->{outh}; ! my $outteh= $self->{outteh}; ! my $ftout= $self->{ftout}; ! my %h= %$ftd; *************** *** 1167,1170 **** --- 1479,1485 ---- BLOC|17099533..17099534 } + + FIXME - aug04, need to get rid of TE/FBti -> FBan + FBTI_IDBASE hack + .. create separate FBan-te.acode file? index by fbti or TE(num) =cut *************** *** 1172,1176 **** if ($uniqid !~ /FBan\d/) { if ($uniqid =~ m/^C[GR]0*(\d+)/) { $uniqid= sprintf("FBan%07d",$1); } ! elsif ($uniqid =~ m/^TE0*(\d+)/) { $uniqid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); } } if ($uniqid ne $anid) { push(@id2,$anid) if $anid; $anid= $uniqid; } --- 1487,1494 ---- if ($uniqid !~ /FBan\d/) { if ($uniqid =~ m/^C[GR]0*(\d+)/) { $uniqid= sprintf("FBan%07d",$1); } ! elsif ($uniqid =~ m/^TE0*(\d+)/) { ! $uniqid= sprintf("FBti%07d", $1 ); $outh= $self->{outteh}; ! ##was ## $uniqid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); ! } } if ($uniqid ne $anid) { push(@id2,$anid) if $anid; $anid= $uniqid; } *************** *** 1181,1195 **** ## no - either put TE into separate FBan-te.acode, or revise FBanID num if ($gid !~ /FB\w\w\d+/) { $gid = $cgsym; $gid =~ s/TE/FBti/; } ! if ($anid !~ /FBan\d/) { ! if ($anid =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); } ! elsif ($cgsym =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); } } - # $anid = $cgsym; - # if ($cgsym =~ m/TE0*(\d+)/) { ##? is TE/FBti id distinct from FBan id num range? NO - # $anid = $1; - # $anid = sprintf("FBan%07d",( FBTI_IDBASE + $anid)); ## this is bad; conflicts with CGnn/FBan - # } - $cgsym= $gsym; $gensr = "INSR\n{\n"; #?? INSR = TIR subrec ? will this TIRecord cause problems? --- 1499,1514 ---- ## no - either put TE into separate FBan-te.acode, or revise FBanID num if ($gid !~ /FB\w\w\d+/) { $gid = $cgsym; $gid =~ s/TE/FBti/; } ! ! if ($anid !~ /FBti\d/) { ! if ($gid =~ m/^FBti0*(\d+)/) { $anid= sprintf("FBti%07d", $1 ); } ! elsif ($anid =~ m/^TE0*(\d+)/) { $anid= sprintf("FBti%07d", $1 ); } ! elsif ($cgsym =~ m/^TE0*(\d+)/) { $anid= sprintf("FBti%07d", $1 ); } ! $outh= $self->{outteh}; # put in separate acode file } + # if ($anid !~ /FBan\d/) { + # if ($anid =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); } + # elsif ($cgsym =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); } + # } $cgsym= $gsym; $gensr = "INSR\n{\n"; #?? INSR = TIR subrec ? will this TIRecord cause problems? *************** *** 1326,1341 **** print $outh "SQLEN|$sqlen\n" if $sqlen; ##$h{SQLEN}; print $outh "GO|$go\n" if $go; - print $outh "SYN|".join("\n|",split(/$RECSEP/,$h{SYN}))."\n" if $h{SYN}; push( @id2, split(/$RECSEP/,$h{ID2})) if $h{ID2}; print $outh "ID2|".join("\n|",@id2)."\n" if @id2; ! print $outh "CDNA|".join("\n|",split(/$RECSEP/,$h{CDNA}))."\n" if $h{CDNA}; ! print $outh "EST|".join("\n|",split(/$RECSEP/,$h{EST}))."\n" if $h{EST}; ! print $outh "AFFY|".join("\n|",split(/$RECSEP/,$h{AFFY}))."\n" if $h{AFFY}; - #?? split these, others on /;/ ?? - print $outh "PEPST|".join("\n|",split(/$RECSEP/,$h{PEPST}))."\n" if $h{PEPST}; - my $gcm = join("\n|", split(/$RECSEP/,$h{CMT})); $gcm =~ s/^\s*//; --- 1645,1656 ---- print $outh "SQLEN|$sqlen\n" if $sqlen; ##$h{SQLEN}; print $outh "GO|$go\n" if $go; push( @id2, split(/$RECSEP/,$h{ID2})) if $h{ID2}; print $outh "ID2|".join("\n|",@id2)."\n" if @id2; ! foreach my $dkey (qw(SYN ORTHO EST CDNA DGC AFFY PEPST)) { ! print $outh "$dkey|".join("\n|",split(/$RECSEP/,$h{$dkey}))."\n" if $h{$dkey}; ! } my $gcm = join("\n|", split(/$RECSEP/,$h{CMT})); $gcm =~ s/^\s*//; *************** *** 1440,1450 **** } ! if (ref($ca) =~ /ARRAY/) { $self->evidAcode( $outh, $ca, $ftout, $arm ); } ! # if ($ca && $ca->{$h{CGSYM}}){ evidAcode( $outh, $ca->{$h{CGSYM}} ); } my %did= map { $_,1; } qw/ANID ID ID2 GID GSYM GNLEN ARM CLOC CGSYM SCAF BLOC mRNA CTSYM ! DT AALEN AANAM CDS SQLEN GO CLOCC PEPST PEPCMT CMT SYN ! AFFY EST CDNA TYPE ENAM srcid residues trns trn aaresid mrnaresid/; foreach my $k (sort keys %h) { if($k =~ /\w/ && !$did{$k}) { print $outh "$k|$h{$k}\n"; } --- 1755,1768 ---- } ! if (ref($evd) =~ /ARRAY/) { $self->evidAcode( $outh, $evd, $ftout, $arm ); } ! # if ($evd && $evd->{$h{CGSYM}}){ evidAcode( $outh, $evd->{$h{CGSYM}} ); } ! ! if (ref($synt) =~ /ARRAY/) { $self->syntAcode( $outh, $synt, $ftout, $arm ); } my %did= map { $_,1; } qw/ANID ID ID2 GID GSYM GNLEN ARM CLOC CGSYM SCAF BLOC mRNA CTSYM ! DT AALEN AANAM CDS SQLEN GO CLOCC PEPCMT CMT ! SYN ORTHO EST CDNA DGC AFFY PEPST ! TYPE ENAM srcid residues trns trn aaresid mrnaresid/; foreach my $k (sort keys %h) { if($k =~ /\w/ && !$did{$k}) { print $outh "$k|$h{$k}\n"; } *************** *** 1456,1459 **** --- 1774,1820 ---- + ## jan05 add + # SYNT + # { + # CLA|syntenic_region + # NAM|syntenic_block:240 + # BLOC|dmel/2L:3458904..3859790 + # BLOC|dpse/4_group3:4830267..5271058 + # } + + sub syntAcode { + my $self = shift; + my($outh, $eref, $ftout, $arm)= @_; + my @kv= @$eref; ## array of evd hash + + foreach my $v (@kv) { + my %h= %$v; + my $earm= $h{ARM} || $arm; + my $type= $h{TYPE}; + my $name= $h{NAM}; + + print $outh "SYNT\n{\n"; + print $outh "NAM|$name\n"; + print $outh "CLA|$type\n" if $type; + print $outh "DT|$h{DT}\n" if $h{DT}; + + my @bl= split(/$RECSEP/,$h{mRNA}); # many of these per program/db/type + my @chrs= split(/$RECSEP/,$h{mRNA_chr}); + + foreach my $bl (@bl) { + my $id= ''; + my $chr= shift @chrs; # should match mRNA list + my $blwrap = wrapLong($bl); #>> "$spp/$chr:$bl" + + print $outh "BLOC|$blwrap\n" if $blwrap; + + ## only printFeature for this organism ! + # next unless($chr eq $earm); #? + # $self->printFeature($ftout, $earm, $type, $name, '', $bl, $id, '') + # if ($bl && $ftout && !$nofeat); + } + print $outh "}\n"; + } + } sub evidAcode { *************** *** 1607,1611 **** open(AA,$aaf) or die "$aaf"; open(IDX,">$aaf.idx") or die ">$aaf.idx"; ! open(IDX2,">$aaf.$id2tag.idx") or die ">$aaf.$id2tag.idx"; my ($idnum, $id2num, $at0, $at, $start, $end)= (0,0,0,0,0,0); while(<AA>) { --- 1968,1972 ---- open(AA,$aaf) or die "$aaf"; open(IDX,">$aaf.idx") or die ">$aaf.idx"; ! if ($id2tag) { open(IDX2,">$aaf.$id2tag.idx") or die ">$aaf.$id2tag.idx"; } my ($idnum, $id2num, $at0, $at, $start, $end)= (0,0,0,0,0,0); while(<AA>) { *************** *** 1623,1627 **** $idnum= $1; } ! if (/^ID\|${id2tag}0*(\d+)/) { $id2num= $1; } --- 1984,1988 ---- $idnum= $1; } ! if ($id2tag && /^ID\|${id2tag}0*(\d+)/) { $id2num= $1; } *************** *** 1630,1634 **** putIndex(*IDX,$start,$end,$idnum) if ($idnum>0); putIndex(*IDX2,$start,$end,$id2num) if ($id2num>0); ! close(AA); close(IDX);close(IDX2); warn "# indexAcode done\n" if $debug; } --- 1991,1997 ---- putIndex(*IDX,$start,$end,$idnum) if ($idnum>0); putIndex(*IDX2,$start,$end,$id2num) if ($id2num>0); ! close(AA); ! close(IDX); ! close(IDX2) if ($id2tag); warn "# indexAcode done\n" if $debug; } |