[Nelsonlab-cmts] libnelson/Pg/celsius/bin profile.pl,1.5,1.6

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/libnelson/Pg/celsius/bin
In directory sumo.genetics.ucla.edu:/tmp/cvs-serv3867/bin

Modified Files:
	profile.pl 
Log Message:
updates to use torso.  also added "-m" option (untested) to allow specification
of full profile, rather than dimension coordinates to create matrix from db
dynamically.

Index: profile.pl
===================================================================
RCS file: /cvsroot/libnelson/Pg/celsius/bin/profile.pl,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** profile.pl	3 Feb 2006 01:28:58 -0000	1.5
--- profile.pl	11 Mar 2006 02:56:02 -0000	1.6
***************
*** 7,10 ****
--- 7,11 ----
  my $cel_file;
  my $element_file;
+ my $matrix_file;
  my $verbose;
  my $help;
***************
*** 13,21 ****
              "cel|c=s"     => \$cel_file,
              "element|e=s" => \$element_file,
              "verbose|v"   => \$verbose,
              "help|h"      => \$help
  );

! if ( ! $cel_file or ! $element_file or $help ) {
    print <<"USAGE";
  Usage: $0 [-v] -c <file of SN accessions> -e <file of probeset identifiers>
--- 14,23 ----
              "cel|c=s"     => \$cel_file,
              "element|e=s" => \$element_file,
+             "matrix|m=s"  => \$matrix_file,
              "verbose|v"   => \$verbose,
              "help|h"      => \$help
  );

! if ( !($matrix_file or ($cel_file and $element_file)) or $help ) {
    print <<"USAGE";
  Usage: $0 [-v] -c <file of SN accessions> -e <file of probeset identifiers>
***************
*** 24,31 ****
  Celsius warehouse.

! This program takes two inputs:
    1) a list of SN identifiers as provisioned by the celsius CEL warehouse
    2) a list of Affymetrix probeset identifiers.

  $0 caculates Euclidean distance in a P-dimensional space where P is the number
  of probesets in the probeset identifier file.  The median values of all SN
--- 26,39 ----
  Celsius warehouse.

! This program can operate in two modes.
! 
! The first mode requires the -c and -e options, taking two inputs:
    1) a list of SN identifiers as provisioned by the celsius CEL warehouse
    2) a list of Affymetrix probeset identifiers.

+ The second mode requires the -m options, taking one input:
+   * a matrix, columns as chip IDs (your own), rows as Affymetrix probeset
+     identifiers.
+ 
  $0 caculates Euclidean distance in a P-dimensional space where P is the number
  of probesets in the probeset identifier file.  The median values of all SN
***************
*** 45,93 ****
  }

! my $dbh = DBI->connect('dbi:Pg:dbname=modulus;host=soleus.ctrl.ucla.edu','allenday','');
! my $element_sth = $dbh->prepare('SELECT element_id FROM element WHERE name = ?');
! my $signal_sth = $dbh->prepare("SELECT cel.db || ':' || cel.accession AS accession, element.name, result.signal FROM cel, element, result WHERE cel.cel_id = result.cel_id AND element.element_id = result.element_id AND element.element_id = ?");

  my @cel;
- print STDERR "reading cel file..." if $verbose;
- open(F, $cel_file);
- my @cel = <F>;
- chomp @cel;
- close(F);
- print STDERR "done\n" if $verbose;
- 
  my @element;
! print STDERR "reading element file..." if $verbose;
! open(F, $element_file);
! my @element = <F>;
! chomp @element;
! close(F);
! print STDERR "done\n" if $verbose;

  my %result = ();
  my %percentile = ();
- my %profile = ();
- my %element = ();
  my %sample = map {$_=>1} @cel;

  foreach my $e ( @element ) {
-   $element_sth->execute( $e );
-   my ( $id ) = $element_sth->fetchrow_array();
-   die "no id for $e" unless $id;
-   $element{ $e } = $id;
-   print STDERR "element_id for $e = $id\n" if $verbose;
- }
- 
- foreach my $e ( keys %element ) {
    my $full_dist = Statistics::Descriptive::Full->new();
!   my $prof_dist = Statistics::Descriptive::Full->new();

    my @accessions = ();

    print STDERR "retrieving signal for element $e..." if $verbose;
!   $signal_sth->execute( $element{ $e } );
    while ( my $row = $signal_sth->fetchrow_hashref ) {
      $full_dist->add_data( $row->{ 'signal' } );
!     if ( $sample{ $row->{ 'accession' } } ) {
        $prof_dist->add_data( $row->{ 'signal' } );
      }
--- 53,124 ----
  }

! my $dbh = DBI->connect('dbi:Pg:dbname=chado-celsius;host=torso.genomics.ctrl.ucla.edu');
! $dbh->do('SET search_path TO cel, annot, part_elementresult, public');
! my $element_sth = $dbh->prepare('SELECT element_id FROM element AS e, dbxref AS x WHERE e.dbxref_id = x.dbxref_id AND x.accession = ?');
! my $signal_sth = $dbh->prepare("
! SELECT d.name || ':' || x.accession AS accession, r.signal FROM rma AS r, quantification AS q, cel AS c, cel_dbxref AS cx, dbxref AS x, db AS d WHERE r.element_id = (SELECT element_id FROM element WHERE dbxref_id = (SELECT dbxref_id FROM dbxref WHERE accession = ?)) AND r.quantification_id = q.quantification_id AND c.cel_id = q.acquisition_id AND c.cel_id = cx.cel_id AND cx.dbxref_id = x.dbxref_id AND x.db_id = d.db_id AND d.name = 'SN'
! ");
! my $annotation_sth = $dbh->prepare("SELECT DISTINCT c.name FROM cvterm AS c, dbxref AS x1, dbxref AS x2, cel_dbxref AS cx, acquisition AS q, biomaterialprop AS p WHERE c.cvterm_id = p.type_id AND p.biomaterial_id = q.assay_id AND q.acquisition_id = cx.cel_id AND cx.dbxref_id = x1.dbxref_id AND x1.accession = ? AND c.dbxref_id = x2.dbxref_id AND x2.db_id = (SELECT db_id FROM db WHERE name = ?) AND p.part != 'allenday_tumor' ORDER BY c.name");

  my @cel;
  my @element;
! my %matrix = ();
! 
! if ( $matrix_file ) {
!   print STDERR "reading matrix file..." if $verbose;
!   open(F, $matrix_file) or die "couldn't open matrix file '$matrix_file': $!";
!   my $cel_line = <F>;
!   chomp $cel_line;
!   @cel = split /\t/, $cel_line;
!   shift @cel;
!   while ( my $element_line = <F> ) {
!     chomp $element_line;
!     my ( $e, @signal ) = split /\t/, $element_line;
!     push @element, $e;
!     my $element_dist = Statistics::Descriptive::Full->new();
!     $element_dist->add_data( @signal );
!     $matrix{ $e } = $element_dist;
!   }
!   close(F);
! }
! else {
!   print STDERR "reading cel file..." if $verbose;
!   open(F, $cel_file) or die "couldn't open cel file '$cel_file': $!";
!   @cel = <F>;
!   chomp @cel;
!   close(F);
!   print STDERR "done\n" if $verbose;
! 
!   print STDERR "reading element file..." if $verbose;
!   open(F, $element_file) or die "couldn't open element file '$element_file': $!";
!   @element = <F>;
!   chomp @element;
!   close(F);
!   print STDERR "done\n" if $verbose;
! }

  my %result = ();
  my %percentile = ();
  my %sample = map {$_=>1} @cel;
+ my %profile = ();

  foreach my $e ( @element ) {
    my $full_dist = Statistics::Descriptive::Full->new();
! 
!   my $prof_dist;
!   if ( $matrix_file ) {
!     $prof_dist = $matrix{ $e };
!   }
!   else {
!     $prof_dist = Statistics::Descriptive::Full->new();
!   }

    my @accessions = ();

    print STDERR "retrieving signal for element $e..." if $verbose;
!   $signal_sth->execute( $e );
    while ( my $row = $signal_sth->fetchrow_hashref ) {
      $full_dist->add_data( $row->{ 'signal' } );
!     if ( (!$matrix_file) and $sample{ $row->{ 'accession' } } ) {
        $prof_dist->add_data( $row->{ 'signal' } );
      }
***************
*** 133,140 ****
    }
    $distance = sqrt( $distance );
!   push @n, [$c, $distance];
  }

! foreach my $n ( sort { $a->[1] <=> $b->[1] } @n ) {
!   print $n->[0] ."\t". $n->[1] ."\n";
  }
--- 164,190 ----
    }
    $distance = sqrt( $distance );
! 
! 
!   push @n, [$distance, $c];
  }

! foreach my $n ( sort { $a->[0] <=> $b->[0] } @n ) {
!   my $mpath     = get_annotations( $n->[1], 'MPATH' );
!   my $phenotype = get_annotations( $n->[1], 'MP' );
!   my $cell      = get_annotations( $n->[1], 'CL' );
!   my $anatomy   = get_annotations( $n->[1], 'MA' );
!   my $etc       = get_annotations( $n->[1], 'null' );
!   print join( "\t", ( @{$n}, $anatomy, $mpath, $phenotype, $cell, $etc ) ), "\n";
! }
! 
! sub get_annotations {
!   my $snid = shift;
!   my $dbspace = shift;
!   $snid =~ s/SN://;
!   $annotation_sth->execute($snid, $dbspace);
!   my @a;
!   while ( my ( $name ) = $annotation_sth->fetchrow_array() ) {
!     push @a, $name;
!   }
!   return join ';', @a; 
  }