|
From: <all...@su...> - 2006-03-11 02:56:06
|
Update of /cvsroot/libnelson/Pg/celsius/bin In directory sumo.genetics.ucla.edu:/tmp/cvs-serv3867/bin Modified Files: profile.pl Log Message: updates to use torso. also added "-m" option (untested) to allow specification of full profile, rather than dimension coordinates to create matrix from db dynamically. Index: profile.pl =================================================================== RCS file: /cvsroot/libnelson/Pg/celsius/bin/profile.pl,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** profile.pl 3 Feb 2006 01:28:58 -0000 1.5 --- profile.pl 11 Mar 2006 02:56:02 -0000 1.6 *************** *** 7,10 **** --- 7,11 ---- my $cel_file; my $element_file; + my $matrix_file; my $verbose; my $help; *************** *** 13,21 **** "cel|c=s" => \$cel_file, "element|e=s" => \$element_file, "verbose|v" => \$verbose, "help|h" => \$help ); ! if ( ! $cel_file or ! $element_file or $help ) { print <<"USAGE"; Usage: $0 [-v] -c <file of SN accessions> -e <file of probeset identifiers> --- 14,23 ---- "cel|c=s" => \$cel_file, "element|e=s" => \$element_file, + "matrix|m=s" => \$matrix_file, "verbose|v" => \$verbose, "help|h" => \$help ); ! if ( !($matrix_file or ($cel_file and $element_file)) or $help ) { print <<"USAGE"; Usage: $0 [-v] -c <file of SN accessions> -e <file of probeset identifiers> *************** *** 24,31 **** Celsius warehouse. ! This program takes two inputs: 1) a list of SN identifiers as provisioned by the celsius CEL warehouse 2) a list of Affymetrix probeset identifiers. $0 caculates Euclidean distance in a P-dimensional space where P is the number of probesets in the probeset identifier file. The median values of all SN --- 26,39 ---- Celsius warehouse. ! This program can operate in two modes. ! ! The first mode requires the -c and -e options, taking two inputs: 1) a list of SN identifiers as provisioned by the celsius CEL warehouse 2) a list of Affymetrix probeset identifiers. + The second mode requires the -m options, taking one input: + * a matrix, columns as chip IDs (your own), rows as Affymetrix probeset + identifiers. + $0 caculates Euclidean distance in a P-dimensional space where P is the number of probesets in the probeset identifier file. The median values of all SN *************** *** 45,93 **** } ! my $dbh = DBI->connect('dbi:Pg:dbname=modulus;host=soleus.ctrl.ucla.edu','allenday',''); ! my $element_sth = $dbh->prepare('SELECT element_id FROM element WHERE name = ?'); ! my $signal_sth = $dbh->prepare("SELECT cel.db || ':' || cel.accession AS accession, element.name, result.signal FROM cel, element, result WHERE cel.cel_id = result.cel_id AND element.element_id = result.element_id AND element.element_id = ?"); my @cel; - print STDERR "reading cel file..." if $verbose; - open(F, $cel_file); - my @cel = <F>; - chomp @cel; - close(F); - print STDERR "done\n" if $verbose; - my @element; ! print STDERR "reading element file..." if $verbose; ! open(F, $element_file); ! my @element = <F>; ! chomp @element; ! close(F); ! print STDERR "done\n" if $verbose; my %result = (); my %percentile = (); - my %profile = (); - my %element = (); my %sample = map {$_=>1} @cel; foreach my $e ( @element ) { - $element_sth->execute( $e ); - my ( $id ) = $element_sth->fetchrow_array(); - die "no id for $e" unless $id; - $element{ $e } = $id; - print STDERR "element_id for $e = $id\n" if $verbose; - } - - foreach my $e ( keys %element ) { my $full_dist = Statistics::Descriptive::Full->new(); ! my $prof_dist = Statistics::Descriptive::Full->new(); my @accessions = (); print STDERR "retrieving signal for element $e..." if $verbose; ! $signal_sth->execute( $element{ $e } ); while ( my $row = $signal_sth->fetchrow_hashref ) { $full_dist->add_data( $row->{ 'signal' } ); ! if ( $sample{ $row->{ 'accession' } } ) { $prof_dist->add_data( $row->{ 'signal' } ); } --- 53,124 ---- } ! my $dbh = DBI->connect('dbi:Pg:dbname=chado-celsius;host=torso.genomics.ctrl.ucla.edu'); ! $dbh->do('SET search_path TO cel, annot, part_elementresult, public'); ! my $element_sth = $dbh->prepare('SELECT element_id FROM element AS e, dbxref AS x WHERE e.dbxref_id = x.dbxref_id AND x.accession = ?'); ! my $signal_sth = $dbh->prepare(" ! SELECT d.name || ':' || x.accession AS accession, r.signal FROM rma AS r, quantification AS q, cel AS c, cel_dbxref AS cx, dbxref AS x, db AS d WHERE r.element_id = (SELECT element_id FROM element WHERE dbxref_id = (SELECT dbxref_id FROM dbxref WHERE accession = ?)) AND r.quantification_id = q.quantification_id AND c.cel_id = q.acquisition_id AND c.cel_id = cx.cel_id AND cx.dbxref_id = x.dbxref_id AND x.db_id = d.db_id AND d.name = 'SN' ! "); ! my $annotation_sth = $dbh->prepare("SELECT DISTINCT c.name FROM cvterm AS c, dbxref AS x1, dbxref AS x2, cel_dbxref AS cx, acquisition AS q, biomaterialprop AS p WHERE c.cvterm_id = p.type_id AND p.biomaterial_id = q.assay_id AND q.acquisition_id = cx.cel_id AND cx.dbxref_id = x1.dbxref_id AND x1.accession = ? AND c.dbxref_id = x2.dbxref_id AND x2.db_id = (SELECT db_id FROM db WHERE name = ?) AND p.part != 'allenday_tumor' ORDER BY c.name"); my @cel; my @element; ! my %matrix = (); ! ! if ( $matrix_file ) { ! print STDERR "reading matrix file..." if $verbose; ! open(F, $matrix_file) or die "couldn't open matrix file '$matrix_file': $!"; ! my $cel_line = <F>; ! chomp $cel_line; ! @cel = split /\t/, $cel_line; ! shift @cel; ! while ( my $element_line = <F> ) { ! chomp $element_line; ! my ( $e, @signal ) = split /\t/, $element_line; ! push @element, $e; ! my $element_dist = Statistics::Descriptive::Full->new(); ! $element_dist->add_data( @signal ); ! $matrix{ $e } = $element_dist; ! } ! close(F); ! } ! else { ! print STDERR "reading cel file..." if $verbose; ! open(F, $cel_file) or die "couldn't open cel file '$cel_file': $!"; ! @cel = <F>; ! chomp @cel; ! close(F); ! print STDERR "done\n" if $verbose; ! ! print STDERR "reading element file..." if $verbose; ! open(F, $element_file) or die "couldn't open element file '$element_file': $!"; ! @element = <F>; ! chomp @element; ! close(F); ! print STDERR "done\n" if $verbose; ! } my %result = (); my %percentile = (); my %sample = map {$_=>1} @cel; + my %profile = (); foreach my $e ( @element ) { my $full_dist = Statistics::Descriptive::Full->new(); ! ! my $prof_dist; ! if ( $matrix_file ) { ! $prof_dist = $matrix{ $e }; ! } ! else { ! $prof_dist = Statistics::Descriptive::Full->new(); ! } my @accessions = (); print STDERR "retrieving signal for element $e..." if $verbose; ! $signal_sth->execute( $e ); while ( my $row = $signal_sth->fetchrow_hashref ) { $full_dist->add_data( $row->{ 'signal' } ); ! if ( (!$matrix_file) and $sample{ $row->{ 'accession' } } ) { $prof_dist->add_data( $row->{ 'signal' } ); } *************** *** 133,140 **** } $distance = sqrt( $distance ); ! push @n, [$c, $distance]; } ! foreach my $n ( sort { $a->[1] <=> $b->[1] } @n ) { ! print $n->[0] ."\t". $n->[1] ."\n"; } --- 164,190 ---- } $distance = sqrt( $distance ); ! ! ! push @n, [$distance, $c]; } ! foreach my $n ( sort { $a->[0] <=> $b->[0] } @n ) { ! my $mpath = get_annotations( $n->[1], 'MPATH' ); ! my $phenotype = get_annotations( $n->[1], 'MP' ); ! my $cell = get_annotations( $n->[1], 'CL' ); ! my $anatomy = get_annotations( $n->[1], 'MA' ); ! my $etc = get_annotations( $n->[1], 'null' ); ! print join( "\t", ( @{$n}, $anatomy, $mpath, $phenotype, $cell, $etc ) ), "\n"; ! } ! ! sub get_annotations { ! my $snid = shift; ! my $dbspace = shift; ! $snid =~ s/SN://; ! $annotation_sth->execute($snid, $dbspace); ! my @a; ! while ( my ( $name ) = $annotation_sth->fetchrow_array() ) { ! push @a, $name; ! } ! return join ';', @a; } |