[Gmod-schema-cmts] schema/XMLTools/ChadoSax/src/org/gmod/chado/ix IxReadSax.pm,1.4,1.5 ToAcode.pm,1.

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/gmod/schema/XMLTools/ChadoSax/src/org/gmod/chado/ix
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5226/src/org/gmod/chado/ix

Modified Files:
	IxReadSax.pm ToAcode.pm 
Log Message:
dpse/synteny updates

Index: IxReadSax.pm
===================================================================
RCS file: /cvsroot/gmod/schema/XMLTools/ChadoSax/src/org/gmod/chado/ix/IxReadSax.pm,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** IxReadSax.pm	19 Apr 2004 21:23:16 -0000	1.4
--- IxReadSax.pm	8 Feb 2005 20:03:59 -0000	1.5
***************
*** 49,52 ****
--- 49,53 ----
  rearranged to make them a bit more usable objects.  

+   
  =head1 SEE ALSO

***************
*** 75,87 ****
  use XML::Parser::PerlSAX;
  use Exporter;
! use vars qw/$VERSION $ROOT_NODE $debug @skipKeys @ISA @EXPORT /;
  @ISA = qw(Exporter);
  @EXPORT = qw(&view);

! $VERSION = "0.5";
  $ROOT_NODE='chado';
  $debug= 0;

  BEGIN {
    # some of these are useful - later // timelastmodified 
    # dec03 - drop residues  from skipset - let user decide w/ -skip opt
--- 76,94 ----
  use XML::Parser::PerlSAX;
  use Exporter;
! use vars qw/$VERSION $ROOT_NODE $debug @skipKeys $featmainpatt $featgenpatt @ISA @EXPORT /;
  @ISA = qw(Exporter);
  @EXPORT = qw(&view);

! $VERSION = "0.6";
  $ROOT_NODE='chado';
  $debug= 0;

  BEGIN {
+   $featmainpatt='feature|feature_evidence|prediction_evidence|alignment_evidence';
+   $featgenpatt='analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism';
+   ## jan05 added feature_relationship
+   # 	/^(analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism |feature_relationship)$/ &&  do {  
+   # analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism
+   
    # some of these are useful - later // timelastmodified 
    # dec03 - drop residues  from skipset - let user decide w/ -skip opt
***************
*** 98,104 ****
      is_fmin_partial is_fmax_partial
      evidence_id
-     organism_id 
      _appdata
      /;
  }

--- 105,111 ----
      is_fmin_partial is_fmax_partial
      evidence_id
      _appdata
      /;
+     # organism_id
  }

***************
*** 256,259 ****
--- 263,273 ----
  }

+ sub parent_node {
+ 	my $self= shift;
+ 	my $upto= shift; $upto= -1 unless(defined $upto);
+   my $parft= @{$self->{featstack}}[$upto]; 
+   return $parft;
+ }
+ 
  sub getTopNode; *getTopNode = \&topnode; # java alias

***************
*** 287,295 ****
    added fmin, fmax replacements for nbeg, nend
    need to catch these containers:
!      feature_synonym feature_relationship  feature_dbxref ? feature_cvterm ?  

       feature_synonym > should become an attribute of parent feature ?
         (encloses synonym, synonym_id tags)

  =item XML handlers

--- 301,319 ----
    added fmin, fmax replacements for nbeg, nend
    need to catch these containers:
!      feature_synonym  feature_dbxref ? feature_cvterm ?  

       feature_synonym > should become an attribute of parent feature ?
         (encloses synonym, synonym_id tags)

+ JAN 05: 
+   for synteny features, handle 'gene_syntenic_block' like 
+     prediction_evidence alignment_evidence ?
+     
+   need  feature_relationship for cv type_id = putative_ortholog_of
+     with data in object_id/feature/
+     feature_relationship/
+       object_id/feature/ organism=xxx, cvterm=gene, uniquename=geneID
+       type_id/ name=putative_ortholog_of
+       
  =item XML handlers

***************
*** 342,346 ****
      $self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; 

!     ## test sep03
      if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) {
        $self->{handleObj}->handleObj(0, $ft, $err);
--- 366,370 ----
      $self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; 

!     ##  sep03
      if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) {
        $self->{handleObj}->handleObj(0, $ft, $err);
***************
*** 370,375 ****
     feature feature_evidence prediction_evidence alignment_evidence
     analysis feature_cvterm cvterm cv db pub 
!    synonym organism
!    dbxref featureprop feature_synonym feature_dbxref
     );

--- 394,399 ----
     feature feature_evidence prediction_evidence alignment_evidence
     analysis feature_cvterm cvterm cv db pub 
!    synonym organism feature_relationship
!    dbxref featureprop feature_synonym feature_dbxref 
     );

***************
*** 388,396 ****
    $self->{elcount}{$_}++; # debug only
    return if ($self->{skipkids});
! 
    my $nada= 0;
    SWITCH: {

! 		/^($ROOT_NODE|feature|feature_evidence|prediction_evidence|alignment_evidence|analysis|feature_cvterm|cvterm|cv|db|pub|synonym|organism)$/ &&  do { 

        my $atid= $element->{Attributes}->{id}; 
--- 412,420 ----
    $self->{elcount}{$_}++; # debug only
    return if ($self->{skipkids});
!   
    my $nada= 0;
    SWITCH: {

! 		/^($ROOT_NODE|$featmainpatt|$featgenpatt|feature_relationship)$/ &&  do { #

        my $atid= $element->{Attributes}->{id}; 
***************
*** 400,410 ****
  		  if (/$ROOT_NODE/ && $self->{$ROOT_NODE}) {
         $ft= $self->{$ROOT_NODE};
-         # $self->{idhash}{$atid}= $self->{$ROOT_NODE} if $atid;  
- 		    # last SWITCH;
  		    }
        else {
          $ft= new org::gmod::chado::ix::IxFeat( 
                 tag => $_, id => $atid, handler => $self);
!          }

        $self->{$_}= $ft;
--- 424,432 ----
  		  if (/$ROOT_NODE/ && $self->{$ROOT_NODE}) {
         $ft= $self->{$ROOT_NODE};
  		    }
        else {
          $ft= new org::gmod::chado::ix::IxFeat( 
                 tag => $_, id => $atid, handler => $self);
!         }

        $self->{$_}= $ft;
***************
*** 416,420 ****

          # these are main feature objects...
!       if (/^($ROOT_NODE|feature|feature_evidence|prediction_evidence|alignment_evidence)$/) { #|analysis
          $self->{curfeat}= $ft;
          push( @{$self->{featstack}}, $ft);
--- 438,442 ----

          # these are main feature objects...
!       if (/^($ROOT_NODE|$featmainpatt)$/) { #|analysis
          $self->{curfeat}= $ft;
          push( @{$self->{featstack}}, $ft);
***************
*** 424,427 ****
--- 446,452 ----
         $self->{srcfeature_id}= $atid; # save 
         }
+       if (/^feature$/ && $elpar eq 'object_id') { #?? for odd feature_relationship
+        $self->{object_id}= $atid; # save 
+        }

  			last SWITCH; 
***************
*** 433,445 ****
  			};  

- #		# handle cv_id ref to cv - has id attr if cv/cvname is defined
- # 		/^(cv_id)$/ &&  do { 
- #       my $atid= $element->{Attributes}->{id}; 
- #       if ($atid) {
- #         # my $cv= $self->{cvlist}{$atid}; #??
- #         # my $cvset= $cv->get('cvname'); - need to prefix cvterm/name w/ cvset?
- #         }
- # 			last SWITCH; 
- # 			};  

      /^(featureloc)$/ &&  do { 
--- 458,461 ----
***************
*** 459,462 ****
--- 475,479 ----
  		  ## feature_cvterm is handled ok by nested cvterm/dbxref == GO terms + ids
  		  ## or handle like featureprop ?
+ 		  ## organism| was here.. see above genfeat

  		/^(dbxref|featureprop|feature_synonym|feature_dbxref)$/ &&  do { 
***************
*** 510,517 ****
   		/^(type_id)$/ &&  do {  
   	  # //CAN BE INSIDE  pub OR feature OR feature_relationship OR synonym
! 		  if ($hasval ) { # && $val ne 'contains'
! 		    if ($elpar eq 'feature_relationship') {
! 		      ## skip
  		      }
  		    elsif ($elpar =~ /^(dbxref|featureprop|feature_synonym|feature_dbxref)$/) {
  		      ## attributes now use type_id
--- 527,544 ----
   		/^(type_id)$/ &&  do {  
   	  # //CAN BE INSIDE  pub OR feature OR feature_relationship OR synonym
! 		  if ( $hasval ) { # && $val ne 'contains'
! 		  
! 		    ##if ($elpar eq 'feature_relationship') {
! 		    if ($self->{curgenfeat}->{tag} =~ /(feature_relationship)/) {
! 		      ## skip? jan05 : need some of these
!  		      ##$self->{$elpar}->set( $_ => $val ) ;
!  		      #if ( $self->{curgenfeat}->get('object_id') ) {
!  		      $self->{curgenfeat}->set( $_ => $val ) ;
!  		      #}
  		      }
+ # 		    elsif ($elpar =~ /^(feature_relationship)$/) {
+ # 		      ## skip? jan05 : need some of these
+ # 		      }
+ 		      
  		    elsif ($elpar =~ /^(dbxref|featureprop|feature_synonym|feature_dbxref)$/) {
  		      ## attributes now use type_id
***************
*** 536,541 ****
   		  last SWITCH; };  

!  		/^(name|uniquename|miniref|program|sourcename|programversion|seqlen|cvterm_id|analysis_id|timelastmodified)$/ &&  do {  
   		    ## general feature values - should be only 1/feature
  		  if ($hasval) {
  		    if ($self->{curgenfeat}) {
--- 563,569 ----
   		  last SWITCH; };  

!  		/^(name|uniquename|miniref|program|sourcename|programversion|seqlen|cvterm_id|object_id|analysis_id|timelastmodified)$/ &&  do {  
   		    ## general feature values - should be only 1/feature
+  		  unless( $hasval ) { $val= $self->{$_}; $hasval= ($val =~ /\S/);  }
  		  if ($hasval) {
  		    if ($self->{curgenfeat}) {
***************
*** 566,569 ****
--- 594,602 ----
   		  last SWITCH; };  

+ 		/^(genus|species)$/ &&  do {  
+ 		  if ($hasval && $self->{organism}) {
+  		    $self->{organism}->set( $_ => $val );
+  		    }
+  		  last SWITCH; };  

  ## 		//PUTTING PARAMETERS INTO OBJECTS
***************
*** 572,576 ****
        # synonym not top level...

!  		/^(organism|pub|cvterm|cv|db|analysis|feature_cvterm|synonym)$/ &&  do {  
        ## // top level chado params
   		  ## here, if cvterm and cv_id has id attrib, add link to cv?
--- 605,609 ----
        # synonym not top level...

!  		/^($featgenpatt|feature_relationship)$/ &&  do {  
        ## // top level chado params
   		  ## here, if cvterm and cv_id has id attrib, add link to cv?
***************
*** 585,590 ****
  			  $self->{vals}{$elpar}= $ft->{id};
  			  }
! 	
! 			if (/(synonym|feature_cvterm)$/) {
          $self->{curfeat}->add($ft);
  			  }
--- 618,628 ----
  			  $self->{vals}{$elpar}= $ft->{id};
  			  }
!       
!       if (/(feature_relationship)$/) { #
!         my ($tag,$rtype) = $self->{$ROOT_NODE}->id2name('type_id', $ft->{type_id});
!         $self->{curfeat}->add($ft)
!           if (scalar(%$ft) && $rtype !~ /^(partof|producedby)$/);
! 			  }	
! 			elsif (/(synonym|feature_cvterm)$/) { 
          $self->{curfeat}->add($ft);
  			  }
***************
*** 599,605 ****
   		  last SWITCH; };  

!  		/^(feature|feature_evidence|prediction_evidence|alignment_evidence)$/ &&  do {  #|analysis 
        my $ft= pop( @{$self->{featstack}});
        my $parft= @{$self->{featstack}}[-1]; 
        $parft->add($ft);
  			$self->{$_}= undef;
--- 637,644 ----
   		  last SWITCH; };  

!  		/^($featmainpatt)$/ &&  do {  #|analysis 
        my $ft= pop( @{$self->{featstack}});
        my $parft= @{$self->{featstack}}[-1]; 
+       $ft->{parnode}= $parft; #??
        $parft->add($ft);
  			$self->{$_}= undef;
***************
*** 609,613 ****
  			$self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; 

! 			## test sep03
  			if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) {
  			  $self->{handleObj}->handleObj(0, $ft);
--- 648,652 ----
  			$self->{curgenfeat}= @{$self->{genfeatstack}}[-1]; 

! 			## sep03
  			if ( $self->{handleObj} && $parft == $self->{$ROOT_NODE} ) {
  			  $self->{handleObj}->handleObj(0, $ft);
***************
*** 655,683 ****
   		  last SWITCH; }; 

- ##		//ATTRIB
- 
- #  		/^(feature_dbxref)$/ &&  do {  
- # 			$self->{curfeat}->addattr($self->{$_});
- #       $self->{$_}= undef;
- #  		  last SWITCH; };  
- 
- #  		/^(dbxref_id)$/ &&  do {  
- #  		  my $attr= $self->{$_};
- #  		  $attr->setattr( $val) if $hasval; #? and not get() ?
- #  		  
- # #  		  my $fattr= $self->{feature_dbxref};
- # #  		  if ($fattr) {
- # #  		    $fattr->set( attr => $attr);
- # # 			  #	//THIS dbxref_id IS INSIDE A feature_dbxref
- # # 			  #	//IT IS NOW SUBSUMED
- # #  		    }
- # #  		  elsif
- #  		  if ( $self->{curfeat} ) {
- # #				//ADD TO FEATURE
- #  			  $self->{curfeat}->addattr($attr);
- #  		    }
- # 	    $self->{$_}= undef;
- #  		  last SWITCH; };  
- 
   		/^(dbxref)$/ &&  do {  
        # now also in attrstack
--- 694,697 ----
***************
*** 696,706 ****

        $self->setAttrs($attr, qw(is_current is_internal is_analysis));
- #  		  my $is_current= $self->{is_current};
- #  		  $attr->set( is_current => $is_current) if defined $is_current;  
- #       $self->{is_current}= undef;

- #  		  my $dbxattr= $self->{dbxref_id};
- #   		if ($dbxattr) { $dbxattr->setattr( $attr ); }
- #  		  elsif
   		  if ( $self->{curgenfeat} ) {
   			  $self->{curgenfeat}->addattr($attr);
--- 710,714 ----
***************
*** 719,722 ****
--- 727,731 ----
  	    $self->{$_}= undef;
        last SWITCH; };  
+ 

  		/^(feature_synonym)$/ &&  do {  ## or synonym -- done by synonym == par->add(ft)
***************
*** 727,745 ****

   		  ## synonym_id always seems to enclose only synonym struct 
- # # 		  my $synonym_id= $self->{synonym_id};
- #  		  $attr->set( synonym_id => $synonym_id) if $synonym_id;  
- #       $self->{synonym_id}= undef;
- #       ##?? change id=feature_synonym_1894 to id=synonym_1894 
- #       
  #       ## synonym struct is: id,name,type_id
- # #  	    my $synonym= $self->{synonym};
- # #  		  $attr->set( synonym => $synonym) if $synonym;  
- # 
- #  		  my $pub_id= $self->{pub_id};
- #  		  $attr->set( pub_id => $pub_id) if $pub_id;  
- # 
- #  		  my $is_current= $self->{is_current};
- #  		  $attr->set( is_current => $is_current) if defined $is_current;  
- #       $self->{is_current}= undef;

   		  if ( $self->{curgenfeat} ) { # was curfeat
--- 736,740 ----

Index: ToAcode.pm
===================================================================
RCS file: /cvsroot/gmod/schema/XMLTools/ChadoSax/src/org/gmod/chado/ix/ToAcode.pm,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** ToAcode.pm	19 Apr 2004 21:23:16 -0000	1.4
--- ToAcode.pm	8 Feb 2005 20:04:17 -0000	1.5
***************
*** 82,85 ****
--- 82,88 ----
  use Getopt::Long;    
  use constant IDXRECSIZE => length(pack("NN", 1, 50000)); # store as unsigned long, unsigned long
+ 
+ 
+ ## FBTI_IDBASE is no longer valid .. aug04
  use constant FBTI_IDBASE => 50000; ## FIXME - quick fix for TE/FBti id in FBan form

***************
*** 112,115 ****
--- 115,123 ----
    $GeneDbName= 'FlyBase:';

+ #   my $sppabbrev={
+ #     Drosophila_pseudoobscura => 'dpse',
+ #     Drosophila_melanogaster => 'dmel',
+ #   };
+   
    @transcript_types= qw( \w+RNA protein pseudogene );
    @non_transcript_types = qw(DNA_motif aberration_junction enhancer insertion_site
***************
*** 117,121 ****
    rescue_fragment sequence_variant
    );
!   @ignore_feature_types = qw(exon chromosome_arm);

    $transcript_types= join('|', @transcript_types);
--- 125,129 ----
    rescue_fragment sequence_variant
    );
!   @ignore_feature_types = qw(exon chromosome_arm ultra_scaffold golden_path_region);

    $transcript_types= join('|', @transcript_types);
***************
*** 152,164 ****

  sub acode {
! 
!   my $outfh= *STDOUT;
!   my $ftout= undef;
!   my $faout= undef; my $fnout= undef;
!   my $outf= undef;
!   my $ftoutf= undef;
    my $doindex= 0;
    my @skipf=();
-   my $fbgn2id= undef;

    my $optok= Getopt::Long::GetOptions( 
--- 160,170 ----

  sub acode {
!   
!   # file names
!   my ($outf, $outte,  $ftout, $fbgn2id ); 
!   # file 'handles'
!   my ($outfh, $outteh, $ftoutf, $faout, $fnout); 
    my $doindex= 0;
    my @skipf=();

    my $optok= Getopt::Long::GetOptions( 
***************
*** 166,169 ****
--- 172,176 ----
      'index!' => \$doindex,
      'outfile=s' => \$outf,
+     'tefile=s' => \$outte,
      'featfile=s' => \$ftoutf,
      'fbgn2id=s' => \$fbgn2id,
***************
*** 205,208 ****
--- 212,216 ----
      }

+   $outfh= *STDOUT;
    if ($outf) {
      # if (-r $outf) -- what ? append?
***************
*** 223,229 ****
      $fnout= *MRNAF;
      }
!       
    my $outhand= new org::gmod::chado::ix::ToAcode(
      outh => $outfh,
      ftout => $ftout,
      faout => $faout,
--- 231,249 ----
      $fnout= *MRNAF;
      }
! 
!   unless ($outte) {
!     if ($outf) { ($outte= $outf) =~ s/\./-TE\./; }
!     # transposable_element have separate ID class - FBti
!     elsif ($ftoutf) { $outte= $ftoutf;  $outte =~ s/\.[\.\w]*$/-TE\.acode/; }
!     }
!   if ($outte) {
!     open(OUTTE,">>$outte") or die "error writing $outte";
!     $outteh= *OUTTE;
!     }
!   else { $outteh= *STDOUT;}
!   
    my $outhand= new org::gmod::chado::ix::ToAcode(
      outh => $outfh,
+     outteh => $outteh,
      ftout => $ftout,
      faout => $faout,
***************
*** 242,245 ****
--- 262,266 ----

    close($outfh) if ($outf);
+   close($outteh) if ($outteh);
    close($ftout) if ($ftout);
    close($faout) if $faout;
***************
*** 247,252 ****

    if ($doindex && $outf) {
!     @ARGV= ($outf);
!     acodeindex();
      }  
  }
--- 268,275 ----

    if ($doindex && $outf) {
!     indexAcode( $outf, "FBan", "FBgn");
!     }  
!   if ($doindex && $outte) {
!     indexAcode( $outte, "FBti");
      }  
  }
***************
*** 267,270 ****
--- 290,297 ----
    unless(-r $file) { die "acodeindex: cannot index $file\n"; }
    indexAcode( $file, "FBan", "FBgn");
+   
+ ## dang FBti problem
+   (my $outte= $file) =~ s/\./-TE\./;  # transposable_element have separate ID class - FBti
+   if (-e $outte) { indexAcode( $outte, "FBti"); }
  }

***************
*** 290,293 ****
--- 317,321 ----
  	$self->{tag}= 'ToAcode' unless (exists $self->{tag} );
  	$self->{outh}= *STDOUT unless (exists $self->{outh} );
+ 	$self->{outteh}= *STDOUT unless (exists $self->{outteh} );
  	$self->{VERS}= $DATA_VERSION unless (exists $self->{VERS} );
  	$self->{ftd}= {};
***************
*** 295,298 ****
--- 323,327 ----
  	$self->{mrnaresid}= {};
  	$self->{evd}= {};
+ 	$self->{synt}= {};
  	$debug= $self->{debug} if $self->{debug};
   }
***************
*** 342,346 ****
--- 371,414 ----
  }

+ ## for IxBase
+ ## my($tag,$forg)= $self->orgid2name($dft,'organism_id', $dft->{organism_id});

+ sub orgid2name {
+ 	my $self= shift;
+ 	my ($ob, $k,$v)= @_;
+   if ($k =~ /_id$/) {
+    my $ft= $ob->{handler}->{idhash}{$v};
+    if ($ft) {
+       ##_debugObj("organism",$ft);
+       #my @attr= $ft->getattrs('genus','species'); 
+   	  my @attr=( $ft->{'genus'}, $ft->{'species'} );
+       if (@attr) { 
+         # $v= join('_',@attr); 
+   	    $v= substr(lc($ft->{'genus'}),0,1) . substr(lc($ft->{'species'}),0,3);
+         $k =~ s/_id$/_name/; 
+         }
+       
+       
+   	  # my $gns= $ft->{'genus'}; my $spp= $ft->{'species'};
+       #if ($gns || $spp) { 
+       #  $v= $gns .'_' . $spp;  ## need configurable abbrevs. of species !
+       #  $k =~ s/_id$/_name/;
+       #  }
+       }
+     }
+   return ($k,$v);
+ }
+ 
+ sub _debugObj
+ {
+   my ($name,$obj)= @_;
+   require Data::Dumper;
+   $Data::Dumper::Maxdepth = 3;
+   my $dd = Data::Dumper->new([$obj]); $dd->Terse(1);
+   $dd->Seen( [ { '*handler' => 1} ]);
+   #$dd->Maxdepth(3); 
+   
+   print STDERR "DEBUG obj: $name=",  $dd->Dump(),"\n";
+ }

  # sub getMiscFeatureLoc {
***************
*** 370,377 ****
--- 438,463 ----
  =cut

+ use vars qw/ $lastob $parob /;
+ 
  sub handleObj {  
  	my $self = shift;
  	my $depth= shift;
   	my $ob   = shift;
+   eval { $self->handleObj_base($depth,$ob,@_); };
+   if ($@) {
+     my $err=$@;
+     my $lob= (ref($lastob)) ? $lastob->name() : 'null'; # $ob->name();
+     warn "#>> handleObj lastob=$lob; lev=$depth; error: $err\n";
+     _debugObj("object",$ob);
+     ##_debugObj("last-object",$lastob);
+ ## DEBUG obj: object={}
+     }
+   $lastob= $ob;
+ }
+ 
+ sub handleObj_base {  
+ 	my $self = shift;
+ 	my $depth= shift;
+  	my $ob   = shift;

   	my $parsestate= shift; #? skip these
***************
*** 391,394 ****
--- 477,481 ----
   	my $ftd= $self->{ftd};
   	my $evd= $self->{evd};
+  	##my $synt= $self->{synt}; # like evd; but needs sep. acode subrec
    my $aaresid= $self->{aaresid}; #? keep in $ftd
    my $mrnaresid= $self->{mrnaresid}; #? keep in $ftd
***************
*** 401,404 ****
--- 488,492 ----
        delete $self->{ftd};
        delete $self->{evd};
+       delete $self->{synt};
        $ftd= $self->{ftd}= {}; #?? clear

***************
*** 406,409 ****
--- 494,498 ----
        $mrnaresid= $self->{mrnaresid}= {}; #?? clear
        $evd= $self->{evd}= {}; #?? clear
+       $self->{synt}= {}; #?? clear
        $trn= $ftd->{trn}= {};
        $self->{cdsloc}= '';
***************
*** 520,523 ****
--- 609,613 ----
     ($otag =~ /^feature$/ && $depth == 2) && do {

+       my $ftype= 0;
        if ($otype  =~ /^(protein)$/ ) { #&& $depth == 2 ?! need parent.otype == mRNA
          # $otype= 'CDS';
***************
*** 546,551 ****
          }

!       elsif ($otype !~ /^(exon|chromosome_arm)$/) {
!         warn "MISSED feature=$otype [lev=$depth] ".$obname." \n"; # if $debug;
          }

--- 636,647 ----
          }

!       elsif ($otype =~ m/^(intron|CDS)$/) { $ftype= -1; }
!         ## from dpse chado reporting db:
!         ## CDS = redundant w/ protein; 
!         ## intron = redundant w/ getlocationString('intron')
!         
!       elsif ($otype =~ m/^($ignore_feature_types)$/) { $ftype= -1; }
!       else { ##if ($otype !~ /^(exon|chromosome_arm|ultra_scaffold)$/)   # == $ignore_feature_types
!         warn "MISSED feature=$otype [lev=$depth] ".$obname.','.$ob->{id}." \n";  
          }

***************
*** 553,556 ****
--- 649,708 ----
  			};  

+     ## jan05 - for dpse; see syntAcode
+   ( $otag =~ /^feature$/ && $depth == 1 
+    && $otype =~ m/^(syntenic_region|orthologous_region)$/ ) 
+    && do {
+     ## has feature.feature. substructure
+     last SWITCH if ($ob->get('is_internal') == 1);
+     
+     my $synt= $self->{synt};
+     
+     my $anid= $obuniqid; ##$ob->get('id'); # feature id
+     my $anh= $synt->{$anid}= {};
+     ##unless(defined $anh) { $synt->{$anid}= $anh= {}; }
+ 
+     #? my $obuniqid= $ob->{uniquename} || undef;
+     $anh->{NAM} = $obuniqid;
+     $anh->{TYPE}= $otype if ($otype);
+     
+     # my($prg,$pdb)=("synteny","dummy"); #$anh->{PROGRAM},$anh->{DATABASE});
+     # warn "# synt $otag:$otype = $anid \n" if ($debug);
+       
+       ## not right ...
+     ##my @fts= $ob->getfeats('feature');
+     ##if (@fts) {
+     ##my $ft= shift @fts; #  1 top level, 2 or 3 subfeature 
+     {
+     my $ft= $ob;
+     my $dt= cleandate($ft->{timelastmodified}); 
+     $anh->{DT} = "$dt" if $dt;   
+ 
+     my @fts= $ft->getfeats('feature'); # find synteny components
+     foreach my $dft (@fts) {
+     
+       # need  spp/chr:location from each feature here ??
+       my $fchr= $dft->{uniquename} || undef; #? $dft->name();
+       my $floc= $ft->getlocation( $dft->{id});
+ 
+       # my($tag,$forg)= $dft->id2name('organism_id', $dft->{organism_id});
+       my($tag,$forg)= $self->orgid2name($dft,'organism_id', $dft->{organism_id});
+       
+       $floc= $fchr . ':' . $floc; 
+       $floc= $forg . '/' . $floc if ($forg);
+       # $floc= $forg .'/' . $fchr . ':' . $floc; 
+       
+       $anh->{mRNA}     .= $floc.$RECSEP; 
+       $anh->{mRNA_chr} .= $fchr.$RECSEP; #??
+       
+       # my($tag,$type)= $dft->id2name('type_id',$dft->{type_id});
+       # $anh->{mRNA_type} .= $type.$RECSEP if ($type);
+ 
+       }
+     }
+     
+     $dolist= $doatts= 0; # no more recurse ??
+     last SWITCH; 
+     };  
+     

     ($otag =~ /^feature$/ && $depth == 1) && do {  
***************
*** 566,570 ****
        elsif ($otype =~ m/^($ignore_feature_types)$/) { $ftype= -1; }
        else {
!         warn "MISSED feature=$otype [lev=$depth] ".$obname." \n";  
          }

--- 718,722 ----
        elsif ($otype =~ m/^($ignore_feature_types)$/) { $ftype= -1; }
        else {
!         warn "MISSED feature=$otype [lev=$depth] ".$obname.','.$ob->{id}." \n";  
          }

***************
*** 614,617 ****
--- 766,772 ----
          if ($aaloc) {
            $ob->insertlocations($aaloc, $locs); ## do utr math also
+           ## ^^ handles transspliced items, and also adds
+           ## $loca->{five_prime_UTR}= \@head;   $loca->{three_prime_UTR}= \@tail; 
+ 
            $loc = $ob->getlocationString($aaloc); # if $aaloc;

***************
*** 658,662 ****
  # 			};  

! 
      ($otag =~ /^feature_dbxref$/ && $depth < 4) && do {
        last SWITCH if ($ob->get('is_internal') == 1);  
--- 813,817 ----
  # 			};  

!       
      ($otag =~ /^feature_dbxref$/ && $depth < 4) && do {
        last SWITCH if ($ob->get('is_internal') == 1);  
***************
*** 672,676 ****
          # warn "DEBUG: feature_dbxref=$db:$attr, $dbattr\n" if $debug;
          if ($depth == 1) { 
!           if ($iscur == 0) { $ftd->{ID2} .= $attr .$RECSEP; }
            else {
              ## patch here? to replace FBgn 2ndary ids w/ primary 
--- 827,833 ----
          # warn "DEBUG: feature_dbxref=$db:$attr, $dbattr\n" if $debug;
          if ($depth == 1) { 
!           if ($attr =~ /^(SO|GO):/) {  }
!           elsif ($attr =~ /^(X|2L|2R|3L|3R|4)$/) {  }
!           elsif ($iscur == 0) { $ftd->{ID2} .= $attr .$RECSEP; }
            else {
              ## patch here? to replace FBgn 2ndary ids w/ primary 
***************
*** 682,687 ****
              elsif ($attr =~ /^C[GR]\d/ ) { $ftd->{CGSYM}= $attr; }
              elsif ($attr =~ /^TE\d/ ) { $ftd->{CGSYM} = $attr; } 
-             elsif ($attr =~ /^(SO|GO):/) {  }
-             elsif ($attr =~ /^(X|2L|2R|3L|3R|4)$/) {  }
              else { $notdone=1; }
              }
--- 839,842 ----
***************
*** 694,697 ****
--- 849,854 ----
          if ($notdone==1) { 
            warn "MISSED feature_dbxref=$db:$attr\n"; # if $debug;
+           ## dpse chadoxml:
+           ## MISSED feature_dbxref=GB:AY190949
            }
          }
***************
*** 708,713 ****
        if (%fbgn2id && $attr =~ /FBgn/) { $attr= $fbgn2id{$attr} || $attr; }

!         ## FIX for TE000 == FBti000
!          if ($iscur == 0) { $ftd->{ID2} .= $dbattr .$RECSEP; }
        elsif ($attr =~ /^C[GR]\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; }
        elsif ($attr =~ /^TE\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; }#or ANID?
--- 865,871 ----
        if (%fbgn2id && $attr =~ /FBgn/) { $attr= $fbgn2id{$attr} || $attr; }

!       if ($attr =~ /^(X|2L|2R|3L|3R|4)$/) {  }
!       elsif ($attr =~ /^(SO|GO):/) {  }
!       elsif ($iscur == 0) { $ftd->{ID2} .= $dbattr .$RECSEP; }
        elsif ($attr =~ /^C[GR]\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; }
        elsif ($attr =~ /^TE\d/ && $db =~ m/FlyBase|Gadfly/i) { $ftd->{CGSYM} = $attr; }#or ANID?
***************
*** 715,720 ****
--- 873,888 ----
        elsif ($attr =~ /^(FBgn|FBti)/ && $db =~ m/FlyBase/i) { $ftd->{GID}  = $attr; }
        elsif ($attr =~ /^GO/ && $db =~ m/GO/i) {  }
+       elsif ($attr =~ /^FB\w\w\d+/ || $db =~ /^GB/) {
+         my @sym= ( $ftd->{DBX} );
+         unless ( $self->checkForVal($attr,@sym) ) { $ftd->{DBX} .= $dbattr .$RECSEP; }
+         }
        else {
          warn "MISSED dbxref=$db,$attr,$dbattr\n"; # if $debug;
+         ## nov04 - add these
+         # MISSED dbxref=FlyBase,FBpp0074965,FlyBase:FBpp0074965
+         # MISSED dbxref=FlyBase,FBtr0075204,FlyBase:FBtr0075204
+         #MISSED dbxref=GB_protein,AAN11697.1,GB_protein:AAN11697.1
+         #MISSED dbxref=GB,CG10160,GB:CG10160
+ 
          #MISSED dbxref=GO,GO:0008017,GO:0008017 ?? from what? -- drop
          #MISSED dbxref=FlyBase,CG9884,FlyBase:CG9884 -- r3.2.0-march replaces Gadfly
***************
*** 758,761 ****
--- 926,989 ----

+       ## jan05 - need this for putative_ortholog_of relation
+     ($otag =~ /^feature_relationship$/ && $depth < 4) && do {
+       my $notdone= 1;
+       $dolist= $doatts= 0; # no more recurse ??
+       last SWITCH if ($ob->get('is_internal') == 1);  
+ 
+       my ($tag,$rtype) = $ob->id2name('type_id', $ob->{type_id});
+       last SWITCH if ($rtype =~ /^(partof|producedby)$/);
+       
+       my $anid= $ob->get('object_id');  
+       my $top = $ob->{parnode}; ## not there ##$ob->{handler}->parent_node($depth); 
+       $top= $parob unless($top);
+       $top = $ob->topnode() unless($top);
+       my $anft= $top->getfeatbyid( $anid);
+ 
+       my @fts= $ob->getfeats('feature'); # find relationship components; 1 only?
+       push(@fts, $anft) if $anft;
+       my $attr='';
+       eval {
+       foreach my $dft (@fts) {
+         my($ftag,$ftype,$forg);
+         my $nm= $dft->name(); ## uniquename probably
+         ($ftag,$forg) = $self->orgid2name($dft,'organism_id', $dft->{organism_id});
+         ($ftag,$ftype)= $dft->id2name('type_id', $dft->{type_id});
+         $attr= "$forg/$ftype:$nm";
+         
+         $ftd->{ORTHO} .= "$rtype=$attr".$RECSEP ;  ## fixme NOT right field for this
+         ##$notdone= 0; 
+         }
+       };  
+       warn "#>> feature_relationship error: $@\n" if $@;
+   
+       warn "MISSED feature_relationship=$attr id=$anid ($tag:$rtype) [lev=$depth]\n" if ($notdone); #  && $debug
+       # _debugObj("feature_relationship",$ob);
+ 
+ # MISSED feature_relationship= ,type_name,partof [lev=2]
+ # MISSED feature_relationship= ,type_name,producedby [lev=2]
+ 
+ # chipmunk% ./chado2acode.sh tmpd/test.acode CH379060.top.xml
+ # ToAcode( CH379060.top.xml ).....
+ # IxReadSax parse <CH379060.top.xml>
+ # MISSED feature_relationship= ,type_name,partof [lev=1]
+ # MISSED feature=mRNA [lev=2] GA15475-RA 
+ # MISSED feature_relationship= ,type_name,putative_ortholog_of [lev=1]
+ # MISSED feature=gene [lev=2] CG2822 
+ # MISSED feature_relationship= ,type_name,partof [lev=1]
+ # MISSED feature=mRNA [lev=2] GA21028-RA 
+ # MISSED feature_relationship= ,type_name,putative_ortholog_of [lev=1]
+ # MISSED feature=gene [lev=2] CG8372 
+ 
+ # CMT|comment=Range of putative ortholog from tblastn was extended 
+ # |to cover the gene prediction
+ # |partof=/mRNA:GA15475-RA
+ # |putative_ortholog_of=/gene:CG2822
+ 
+ 			last SWITCH; 
+       };
+       
+       
+       
      ($otag =~ /^featureprop$/ && $depth<3) && do {
        last SWITCH if ($ob->get('is_internal') == 1);
***************
*** 917,925 ****
  			};  

      ($otag =~ /^alignment_evidence$/) && do {
        ## has feature.feature.feature. ... substructure
        last SWITCH if ($ob->get('is_internal') == 1);
!        
        my $anid= $ob->get('analysis_id');
        my $anh= $evd->{$anid};
--- 1145,1193 ----
  			};  

+ #--- nov04, dmel rel4 chado evidence types (prg,pdb); all type =  match
+ ## Chado-FBan EVD PRG_DB list - Nov 04
+ # egrep '^(PRG|DB)\b' fban-r4.acode | \
+ # perl -ne'$p=$1 if(/PRG\|(\S+)/); if(/DB\|(\S+)/){$d=$1;$f{"${p}_${d}"}++;}\
+ # END{foreach (sort keys %f){print"$_\t$f{$_}\n";}}'
+ # 
+ # assembly_path   6076
+ # blastx_masked_aa_SPTR.dmel      11754
+ # blastx_masked_aa_SPTR.insect    5872
+ # blastx_masked_aa_SPTR.othinv    7436
+ # blastx_masked_aa_SPTR.othvert   5738
+ # blastx_masked_aa_SPTR.plant     3649
+ # blastx_masked_aa_SPTR.primate   6578
+ # blastx_masked_aa_SPTR.rodent    6600
+ # blastx_masked_aa_SPTR.worm      5376
+ # blastx_masked_aa_SPTR.yeast     2509
+ # genie_masked_dummy      9008
+ # genscan_masked_dummy    11360
+ # promoter_dummy  3542
+ # repeat_runner_seg_dummy 1007
+ # repeatmasker_dummy      1377
+ # sim4_na_ARGs.dros       740
+ # sim4_na_ARGsCDS.dros    716
+ # sim4_na_DGC.in_process.dros     2351   == EST/cDNA
+ # sim4_na_HDP_RNAi.dmel   50
+ # sim4_na_HDP_mRNA.dmel   66
+ # sim4_na_dbEST.diff.dmel 7139 == EST/cDNA
+ # sim4_na_dbEST.same.dmel 8100 == EST/cDNA
+ # sim4_na_gadfly.dros.RELEASE2    11076
+ # sim4_na_gb.dmel 8236
+ # sim4_na_gb.tpa.dmel     236
+ # sim4_na_smallRNA.dros   49
+ # sim4_na_transcript.dmel.RELEASE31       11710
+ # sim4_na_transcript.dmel.RELEASE32       11824
+ # sim4tandem_na_gb.dmel   6700
+ # tRNAscan-SE_dummy       265
+ # tblastx_masked_na_dbEST.insect  5412
+ # tblastxwrap_masked_na_baylorf1_scfchunk.dpse    11480
+ # tblastxwrap_masked_na_scf_chunk_agambiae.fa     8695
+ 

      ($otag =~ /^alignment_evidence$/) && do {
        ## has feature.feature.feature. ... substructure
        last SWITCH if ($ob->get('is_internal') == 1);
!       
        my $anid= $ob->get('analysis_id');
        my $anh= $evd->{$anid};
***************
*** 932,936 ****
            $anft= $top->getfeatbyid( $anid);
            }
-           
          if ($anft) {
            $anh->{PROGRAM}= $anft->{program}; # program=blastx_masked
--- 1200,1203 ----
***************
*** 940,949 ****
            $anh->{PROGRAM}= $aname; 
            }
         # warn "# evd $otag: $anid = $anh->{PROGRAM} \n" if ($debug);
        }

        $dolist= $doatts= 0; # no more recurse ??
        last SWITCH
!         if ($anh->{PROGRAM} eq 'locator' && $anh->{DATABASE} eq 'cytology');

        my @fts= $ob->getfeats('feature');
--- 1207,1219 ----
            $anh->{PROGRAM}= $aname; 
            }
+           
         # warn "# evd $otag: $anid = $anh->{PROGRAM} \n" if ($debug);
        }

+       my($prg,$pdb)=($anh->{PROGRAM},$anh->{DATABASE});
+       
        $dolist= $doatts= 0; # no more recurse ??
        last SWITCH
!         if ($prg eq 'locator' && $pdb eq 'cytology');

        my @fts= $ob->getfeats('feature');
***************
*** 961,964 ****
--- 1231,1239 ----
          next if ($dft->{id} eq $ftd->{srcid});

+         my($tag,$type)= $dft->id2name('type_id',$dft->{type_id});
+         $anh->{TYPE}= $type if ($type);
+         ## jun04 - need to filter out apollo dupl. evidence for
+         ## type = transposable_element_insertion_site
+ 
          my $dbname= $dft->name();
          my $dbx= $dbname; # default
***************
*** 984,999 ****
          # $anh->{mRNA_dbx2} .= $dbname .';' ; 

!         my($tag,$type)= $dft->id2name('type_id',$dft->{type_id});
!         $anh->{TYPE}= $type if ($type);
!   
!           ## EST only if DB|na_EST.all_nr.dros and/or? DB|na_DGC.dros
!         if ($type eq 'EST' && $anh->{DATABASE} =~ /\.dros/) { 
            my $v= $dbname; $v =~ s/[:_\.].*//;
            $ftd->{EST} .= $v .$RECSEP unless($ftd->{EST} =~ m/$v$RECSEP/); 
            }
!         elsif ($type eq 'cDNA_clone') { 
!           my $v= $dbname;  
            $ftd->{CDNA} .= $v .$RECSEP unless($ftd->{CDNA} =~ m/$v$RECSEP/);  
            }
          elsif ($type eq 'oligonucleotide') { 
            my $v= $dbname; $v =~ s/_at_\d+/_at/;
--- 1259,1293 ----
          # $anh->{mRNA_dbx2} .= $dbname .';' ; 

!         ## EST only if DB|na_EST.all_nr.dros and/or? DB|na_DGC.dros
!         # dmel r4 -- need to recode blast PRG/DB types to get EST/cDNA
!         # sim4_na_EST.all_nr.dros=EST -- old
!         # sim4_na_dbEST.diff.dmel=EST
!         # sim4_na_dbEST.same.dmel=EST
!         # sim4_na_cDNA.dros=cDNA -- old
!         # sim4_na_adh.cDNAs.dros=cDNA -- old
!         # sim4_na_DGC.dros=DGC
!         # sim4_na_DGC.in_process.dros=DGC
!         
!         if ($prg =~ m/(sim4|groupest)/ && $type =~ m/(match|alignment)/) { #? other types?
!           if ($pdb=~/(EST)/) { $type='EST'; }
!           elsif ($pdb=~/(cDNA)/) { $type='cDNA_clone'; }
!           elsif ($pdb=~/(DGC)/) { $type='cDNA_clone'; } # or separate to DGC field?
!           #? change acode type?
!           # $anh->{TYPE}= $type if ($type);
!           }
! 
!         ## EST only if DB|na_EST.all_nr.dros and/or? DB|na_DGC.dros
!         if ($type eq 'EST' && $pdb =~ /\.(dmel|dros)/) { 
            my $v= $dbname; $v =~ s/[:_\.].*//;
            $ftd->{EST} .= $v .$RECSEP unless($ftd->{EST} =~ m/$v$RECSEP/); 
            }
!         elsif ($type =~ /cDNA/) { 
!           my $v= $dbname;  $v =~ s/[:_\.].*//;
            $ftd->{CDNA} .= $v .$RECSEP unless($ftd->{CDNA} =~ m/$v$RECSEP/);  
            }
+         elsif ($type =~ /DGC/) { 
+           my $v= $dbname;  $v =~ s/[:_\.].*//;
+           $ftd->{DGC} .= $v .$RECSEP unless($ftd->{DGC} =~ m/$v$RECSEP/);  
+           }
          elsif ($type eq 'oligonucleotide') { 
            my $v= $dbname; $v =~ s/_at_\d+/_at/;
***************
*** 1016,1030 ****
--- 1310,1328 ----
    ## includes dbxref , featureprop .. ?
    if ( $doatts && defined $ob->{attrlist} ) {
+     my $lastpar= $parob; $parob= $ob;
      foreach (@{$ob->{attrlist}}) { 
        # $_->handleObj(1+$depth, $self);  
        $self->handleObj(1+$depth, $_);  
        }
+     $parob= $lastpar;
      }

    if ($dolist && defined $ob->{list} ) {
+     my $lastpar= $parob; $parob= $ob;
      foreach (@{$ob->{list}}) { 
        # $_->handleObj(1+$depth, $self);  
        $self->handleObj(1+$depth, $_);  
        }
+     $parob= $lastpar;
      }
    ## recursion end ......
***************
*** 1033,1046 ****
    {
      my @elist= values( % $evd );
      $self->{defline}='';   
!     my $outh= $self->{outh};
!     my $ftout= $self->{ftout};
      $ftd->{VERS}= $self->{VERS}; # data version from where?
!     $self->fbanAcode( $outh, $ftd, $ftout,\@elist); ##, $FTOUT, \%ca);

      my $chr= $ftd->{ARM};
      my $defline= $self->{defline}; # fbanAcode() makes this
      my $type='protein'; # $ftd->{TYPE} || << not 'gene'
!     $outh= $self->{faout};
      my $resid= $aaresid;
      if (scalar %$resid && $outh) {
--- 1331,1345 ----
    {
      my @elist= values( % $evd );
+     my @synt = values( % {$self->{synt}} );
      $self->{defline}='';   
!     ##my $outh= $self->{outh};
!     ##my $ftout= $self->{ftout};
      $ftd->{VERS}= $self->{VERS}; # data version from where?
!     $self->fbanAcode1(  $ftd,  \@elist, \@synt); 

      my $chr= $ftd->{ARM};
      my $defline= $self->{defline}; # fbanAcode() makes this
      my $type='protein'; # $ftd->{TYPE} || << not 'gene'
!     my $outh= $self->{faout};
      my $resid= $aaresid;
      if (scalar %$resid && $outh) {
***************
*** 1127,1131 ****
  sub fbanAcode {
  	my $self = shift;
!   my($outh, $ftd, $ftout, $ca)= @_; # array refs of keys, values
    my %h= %$ftd;

--- 1426,1443 ----
  sub fbanAcode {
  	my $self = shift;
!   my($outh, $ftd, $ftout, $evd)= @_; # array refs of keys, values
!   $self->{outh}= $outh;
!   $self->{ftout}= $ftout;
!   return $self->fbanAcode1($ftd, $evd);
! }
! 
! sub fbanAcode1 {
! 	my $self = shift;
!   my( $ftd, $evd, $synt)= @_; # array refs of keys, values
! 
!   my $outh= $self->{outh};
!   my $outteh= $self->{outteh};
!   my $ftout= $self->{ftout};
!   
    my %h= %$ftd;

***************
*** 1167,1170 ****
--- 1479,1485 ----
  BLOC|17099533..17099534
  }
+ 
+   FIXME - aug04, need to get rid of TE/FBti -> FBan + FBTI_IDBASE hack
+      .. create separate FBan-te.acode file? index by fbti or TE(num) 
  =cut 

***************
*** 1172,1176 ****
      if ($uniqid !~ /FBan\d/) {
        if ($uniqid =~ m/^C[GR]0*(\d+)/) { $uniqid= sprintf("FBan%07d",$1); }
!       elsif ($uniqid =~ m/^TE0*(\d+)/) { $uniqid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); }
        }
      if ($uniqid ne $anid) { push(@id2,$anid) if $anid; $anid= $uniqid; }
--- 1487,1494 ----
      if ($uniqid !~ /FBan\d/) {
        if ($uniqid =~ m/^C[GR]0*(\d+)/) { $uniqid= sprintf("FBan%07d",$1); }
!       elsif ($uniqid =~ m/^TE0*(\d+)/) { 
!         $uniqid= sprintf("FBti%07d", $1 ); $outh= $self->{outteh};
!         ##was ## $uniqid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); 
!         }
        }
      if ($uniqid ne $anid) { push(@id2,$anid) if $anid; $anid= $uniqid; }
***************
*** 1181,1195 ****
      ## no - either put TE into separate FBan-te.acode, or revise FBanID num
      if ($gid !~ /FB\w\w\d+/) { $gid = $cgsym; $gid =~ s/TE/FBti/; }  
!     if ($anid !~ /FBan\d/) {
!        if ($anid =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); }
!        elsif ($cgsym =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); }
         }

- #     $anid = $cgsym; 
- #     if ($cgsym =~ m/TE0*(\d+)/) { ##? is TE/FBti id distinct from FBan id num range? NO
- #       $anid = $1;
- #       $anid = sprintf("FBan%07d",( FBTI_IDBASE + $anid)); ## this is bad; conflicts with CGnn/FBan
- #       }
-       
      $cgsym= $gsym;
      $gensr  = "INSR\n{\n"; #?? INSR = TIR subrec ? will this TIRecord cause problems?
--- 1499,1514 ----
      ## no - either put TE into separate FBan-te.acode, or revise FBanID num
      if ($gid !~ /FB\w\w\d+/) { $gid = $cgsym; $gid =~ s/TE/FBti/; }  
! 
!     if ($anid !~ /FBti\d/) {
!        if ($gid =~ m/^FBti0*(\d+)/) { $anid= sprintf("FBti%07d", $1 ); }
!        elsif ($anid =~ m/^TE0*(\d+)/) { $anid= sprintf("FBti%07d", $1 ); }
!        elsif ($cgsym =~ m/^TE0*(\d+)/) { $anid= sprintf("FBti%07d", $1 ); }
!        $outh= $self->{outteh}; # put in separate acode file
         }
+ #     if ($anid !~ /FBan\d/) {
+ #        if ($anid =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); }
+ #        elsif ($cgsym =~ m/^TE0*(\d+)/) { $anid= sprintf("FBan%07d",( FBTI_IDBASE + $1)); }
+ #        }

      $cgsym= $gsym;
      $gensr  = "INSR\n{\n"; #?? INSR = TIR subrec ? will this TIRecord cause problems?
***************
*** 1326,1341 ****
  		print $outh "SQLEN|$sqlen\n" if $sqlen; ##$h{SQLEN};
  		print $outh "GO|$go\n" if $go;
- 		print $outh "SYN|".join("\n|",split(/$RECSEP/,$h{SYN}))."\n" if $h{SYN};

  		push( @id2, split(/$RECSEP/,$h{ID2})) if $h{ID2};
  	  print $outh "ID2|".join("\n|",@id2)."\n" if @id2;

! 		print $outh "CDNA|".join("\n|",split(/$RECSEP/,$h{CDNA}))."\n" if $h{CDNA};
! 		print $outh "EST|".join("\n|",split(/$RECSEP/,$h{EST}))."\n" if $h{EST};
! 		print $outh "AFFY|".join("\n|",split(/$RECSEP/,$h{AFFY}))."\n" if $h{AFFY};

-       #?? split these, others on /;/ ??
- 		print $outh "PEPST|".join("\n|",split(/$RECSEP/,$h{PEPST}))."\n" if $h{PEPST};
- 		 
      my $gcm = join("\n|", split(/$RECSEP/,$h{CMT}));
  	  $gcm =~ s/^\s*//; 
--- 1645,1656 ----
  		print $outh "SQLEN|$sqlen\n" if $sqlen; ##$h{SQLEN};
  		print $outh "GO|$go\n" if $go;

  		push( @id2, split(/$RECSEP/,$h{ID2})) if $h{ID2};
  	  print $outh "ID2|".join("\n|",@id2)."\n" if @id2;

!     foreach my $dkey (qw(SYN ORTHO EST CDNA DGC AFFY PEPST)) {
! 		print $outh "$dkey|".join("\n|",split(/$RECSEP/,$h{$dkey}))."\n" if $h{$dkey};
! 		}

      my $gcm = join("\n|", split(/$RECSEP/,$h{CMT}));
  	  $gcm =~ s/^\s*//; 
***************
*** 1440,1450 ****
  	  }

! 	  if (ref($ca) =~ /ARRAY/) { $self->evidAcode( $outh, $ca, $ftout, $arm ); }
! #   if ($ca && $ca->{$h{CGSYM}}){ evidAcode( $outh, $ca->{$h{CGSYM}}  ); }

  	  my %did= map { $_,1; } 
  	    qw/ANID ID ID2 GID GSYM GNLEN ARM CLOC CGSYM SCAF BLOC mRNA CTSYM 
!         DT AALEN AANAM CDS SQLEN GO CLOCC PEPST PEPCMT CMT SYN
!         AFFY EST CDNA TYPE ENAM srcid residues trns trn aaresid mrnaresid/;
      foreach my $k (sort keys %h) {
        if($k =~ /\w/ && !$did{$k}) { print $outh "$k|$h{$k}\n"; }
--- 1755,1768 ----
  	  }

! 	  if (ref($evd) =~ /ARRAY/) { $self->evidAcode( $outh, $evd, $ftout, $arm ); }
! #   if ($evd && $evd->{$h{CGSYM}}){ evidAcode( $outh, $evd->{$h{CGSYM}}  ); }
! 
!     if (ref($synt) =~ /ARRAY/) { $self->syntAcode( $outh, $synt, $ftout, $arm ); }

  	  my %did= map { $_,1; } 
  	    qw/ANID ID ID2 GID GSYM GNLEN ARM CLOC CGSYM SCAF BLOC mRNA CTSYM 
!         DT AALEN AANAM CDS SQLEN GO CLOCC PEPCMT CMT
!         SYN ORTHO EST CDNA DGC AFFY PEPST
!         TYPE ENAM srcid residues trns trn aaresid mrnaresid/;
      foreach my $k (sort keys %h) {
        if($k =~ /\w/ && !$did{$k}) { print $outh "$k|$h{$k}\n"; }
***************
*** 1456,1459 ****
--- 1774,1820 ----

+ ## jan05 add
+ # SYNT 
+ # {
+ # CLA|syntenic_region
+ # NAM|syntenic_block:240
+ # BLOC|dmel/2L:3458904..3859790
+ # BLOC|dpse/4_group3:4830267..5271058
+ # }
+ 
+ sub syntAcode {
+ 	my $self = shift;
+ 	my($outh, $eref, $ftout, $arm)= @_;  
+   my @kv= @$eref;  ## array of evd hash
+   
+   foreach my $v (@kv) {
+     my %h= %$v; 
+     my $earm= $h{ARM} || $arm;
+     my $type= $h{TYPE};
+     my $name= $h{NAM};
+ 
+     print $outh "SYNT\n{\n";
+     print $outh "NAM|$name\n";
+     print $outh "CLA|$type\n" if $type;
+     print $outh "DT|$h{DT}\n" if $h{DT};
+     
+     my @bl= split(/$RECSEP/,$h{mRNA}); # many of these per program/db/type
+     my @chrs= split(/$RECSEP/,$h{mRNA_chr});  
+ 
+     foreach my $bl (@bl) {
+       my $id= '';
+       my $chr= shift @chrs; # should match mRNA list
+       my $blwrap = wrapLong($bl); #>> "$spp/$chr:$bl"
+ 
+       print $outh "BLOC|$blwrap\n" if $blwrap;
+       
+       ## only printFeature for this organism !
+       # next unless($chr eq $earm); #?
+       # $self->printFeature($ftout, $earm, $type, $name, '', $bl, $id, '') 
+       #  if ($bl && $ftout && !$nofeat);
+       }
+     print $outh "}\n";
+     }
+ }

  sub evidAcode {
***************
*** 1607,1611 ****
  	open(AA,$aaf) or die "$aaf";
  	open(IDX,">$aaf.idx") or die ">$aaf.idx";
! 	open(IDX2,">$aaf.$id2tag.idx") or die ">$aaf.$id2tag.idx";
  	my ($idnum, $id2num, $at0, $at, $start, $end)= (0,0,0,0,0,0);
  	while(<AA>) {
--- 1968,1972 ----
  	open(AA,$aaf) or die "$aaf";
  	open(IDX,">$aaf.idx") or die ">$aaf.idx";
! 	if ($id2tag) { open(IDX2,">$aaf.$id2tag.idx") or die ">$aaf.$id2tag.idx"; }
  	my ($idnum, $id2num, $at0, $at, $start, $end)= (0,0,0,0,0,0);
  	while(<AA>) {
***************
*** 1623,1627 ****
  			$idnum= $1;
  			}
! 		if (/^ID\|${id2tag}0*(\d+)/) {
  			$id2num= $1;
  			}
--- 1984,1988 ----
  			$idnum= $1;
  			}
! 		if ($id2tag && /^ID\|${id2tag}0*(\d+)/) {
  			$id2num= $1;
  			}
***************
*** 1630,1634 ****
  	putIndex(*IDX,$start,$end,$idnum) if ($idnum>0);
  	putIndex(*IDX2,$start,$end,$id2num) if ($id2num>0);
! 	close(AA); close(IDX);close(IDX2);
  	warn "# indexAcode done\n" if $debug;
  }
--- 1991,1997 ----
  	putIndex(*IDX,$start,$end,$idnum) if ($idnum>0);
  	putIndex(*IDX2,$start,$end,$id2num) if ($id2num>0);
! 	close(AA); 
! 	close(IDX);
! 	close(IDX2) if ($id2tag);
  	warn "# indexAcode done\n" if $debug;
  }

[Gmod-schema-cmts] schema/XMLTools/ChadoSax/src/org/gmod/chado/ix IxReadSax.pm,1.4,1.5 ToAcode.pm,1.

[Gmod-schema-cmts] schema/XMLTools/ChadoSax/src/org/gmod/chado/ix IxReadSax.pm,1.4,1.5 ToAcode.pm,1.4,1.5