|
From: <rb...@us...> - 2010-08-24 17:45:38
|
Revision: 5116
http://sashimi.svn.sourceforge.net/sashimi/?rev=5116&view=rev
Author: rbs66
Date: 2010-08-24 17:45:30 +0000 (Tue, 24 Aug 2010)
Log Message:
-----------
Updated to write roc curve data and error data for individual charges, for use with Model Parser updates.
Modified Paths:
--------------
trunk/trans_proteomic_pipeline/src/Validation/MixtureModel/MixtureModel.cxx
trunk/trans_proteomic_pipeline/src/Validation/MixtureModel/MixtureModel.h
Modified: trunk/trans_proteomic_pipeline/src/Validation/MixtureModel/MixtureModel.cxx
===================================================================
--- trunk/trans_proteomic_pipeline/src/Validation/MixtureModel/MixtureModel.cxx 2010-08-23 22:40:54 UTC (rev 5115)
+++ trunk/trans_proteomic_pipeline/src/Validation/MixtureModel/MixtureModel.cxx 2010-08-24 17:45:30 UTC (rev 5116)
@@ -1,9 +1,9 @@
#include "MixtureModel.h"
/*
-Program : MixtureModel for PeptideProphet
-Author : Andrew Keller <ak...@sy...>
-Date : 11.27.02
+Program : MixtureModel for PeptideProphet
+Author : Andrew Keller <ak...@sy...>
+Date : 11.27.02
Primary data object holding all mixture distributions for each precursor ion charge
@@ -25,10 +25,9 @@
Andrew Keller
Insitute for Systems Biology
-1441 North 34th St.
+1441 North 34th St.
Seattle, WA 98103 USA
ak...@sy...
-
*/
#include "common/TPPVersion.h"
@@ -73,12 +72,12 @@
probs_ = new Array<Array<double>*>;
inds_ = new Array<Array<int>*>;
mixtureDistrs_ = new Array<Array<MixtureDistr*>*>;
- gamma_ = True;
+ gamma_ = True;
// HENRY: Don't need to adjust +2/+3 probs for SpectraST
// if (strcasecmp(search_engine, "SPECTRAST") == 0) {
// use_adj_probs_ = False;
- //}
- //else {
+ //}
+ //else {
// use_adj_probs_ = True;
//}
// END HENRY
@@ -117,7 +116,7 @@
strcat(enz_dig_distr_name, " term. [ntt]");
char discr_distr_name[100];
- if(search_engine != NULL)
+ if(search_engine != NULL)
strcpy(discr_distr_name, search_engine);
else {
cerr << "error: null search engine" << endl;
@@ -126,7 +125,7 @@
strcat(discr_distr_name, " discrim score [fval]");
if(! force)
assessPeptideProperties(filename, True, False); // first Boolean for icat, second for glyc
-
+
char nmc_distr_name[100];
strcpy(nmc_distr_name, "no. missed enz. cleavages [nmc]");
setMixtureDistributionNames(discr_distr_name, enz_dig_distr_name, nmc_distr_name); // fval and ntt mixture model names
@@ -150,15 +149,15 @@
Array<MixtureDistr*>* next = new Array<MixtureDistr*>;
// add new distributions to model here
/////////////////////////////////////////////////////////////////////////////////////////////
- if(search_engine != NULL && ! strcasecmp(search_engine, "MASCOT"))
+ if(search_engine != NULL && ! strcasecmp(search_engine, "MASCOT"))
next->insertAtEnd(new MascotDiscrimValMixtureDistr(charge, discrim_name_, "fval", maldi_, qtof_));
- else if(search_engine != NULL && ! strcasecmp(search_engine, "SEQUEST"))
+ else if(search_engine != NULL && ! strcasecmp(search_engine, "SEQUEST"))
next->insertAtEnd(new DiscrimValMixtureDistr(charge, discrim_name_, "fval", gamma_, maldi_, qtof_));
else {
cerr << "cannot accept search engine: " << search_engine << endl;
exit(1);
}
-
+
next->insertAtEnd(new NTTMixtureDistr(charge, ntt_name_, "ntt"));
if(search_engine == NULL || strcasecmp(search_engine, "MASCOT")) //! mascot)
@@ -179,9 +178,9 @@
/////////////////////////////////////////////////////////////////////////////////////////////
mixtureDistrs_->insertAtEnd(next);
}
-
-
+
+
// open stream and read Data, then derive model...
ifstream fin(filename_);
if(! fin) {
@@ -198,14 +197,13 @@
}
-
MixtureModel::MixtureModel(ModelOptions& modelOptions, ScoreOptions& scoreOptions, int max_num_iters) {
common_constructor_init();
max_num_iters_ = max_num_iters;
lastitr_totdiff_ = new Array<double>;
rep_specs_ = NULL;
// parse through modelOptions for these....
-
+
icat_ = modelOptions.icat_ == ICAT_ON; //0; //icat;
glyc_ = modelOptions.glyc_; //0; //glyc;
phospho_ = modelOptions.phospho_; //0; //phospho;
@@ -241,7 +239,7 @@
run_inds_ = new Array<Array<int>*>;
mixtureDistrs_ = new Array<Array<MixtureDistr*>*>;
gamma_ = True;
-
+
// HENRY: Need to store away the search_engine type so as to set the convergence threshold differently for SpectraST
search_engine_ = new char[strlen(modelOptions.engine_)+1];
strcpy(search_engine_, modelOptions.engine_);
@@ -259,7 +257,7 @@
// HENRY: Don't need to adjust +2/+3 probs for SpectraST
// if (strcasecmp(search_engine_, "SPECTRAST") == 0) {
// use_adj_probs_ = False;
- //} else {
+ //} else {
// use_adj_probs_ = True;
//}
// END HENRY
@@ -317,7 +315,7 @@
Array<MixtureDistr*>* next = new Array<MixtureDistr*>;
// add new distributions to model here
-
+
/////////////////////////////////////////////////////////////////////////////////////////////
next_distr = factory_->getDiscrimValMixtureDistr(charge, modelOptions.neg_gamma_, modelOptions.nonparam_, modelOptions.use_expect_);
if(next_distr != NULL)
@@ -331,11 +329,11 @@
no_mass_ = modelOptions.no_mass_;
// HENRY: Precursor m/z distribution
- if (!no_mass_) {
+ if (!no_mass_) {
if (modelOptions.accMass_) {
if (AM_ptr_ == NULL) {
cout << "adding Accurate Mass mixture distr" << endl;
- next_distr = factory_->getMassDifferenceDiscrMixtureDistr(charge);
+ next_distr = factory_->getMassDifferenceDiscrMixtureDistr(charge);
AM_ptr_ = (MassDifferenceDiscrMixtureDistr*)next_distr;
}
else {
@@ -343,23 +341,23 @@
}
}
else {
- next_distr = factory_->getMassDifferenceDiscrMixtureDistr(charge);
+ next_distr = factory_->getMassDifferenceDiscrMixtureDistr(charge);
}
- if(next_distr != NULL)
+ if(next_distr != NULL)
next->insertAtEnd(next_distr);
accMass_ = modelOptions.accMass_;
if (modelOptions.accMass_) {
- next_distr = factory_->getIsoMassDiffDiscrMixtureDistr(charge);
- if(next_distr != NULL)
+ next_distr = factory_->getIsoMassDiffDiscrMixtureDistr(charge);
+ if(next_distr != NULL)
next->insertAtEnd(next_distr);
}
- }
+ }
// END HENRY
// HENRY: modify to determine if we need ICAT for SpectraST
if (!(strcasecmp(search_engine_, "SPECTRAST") == 0)) {
if(icat_) {
- next_distr = factory_->getICATMixtureDistr(charge);
+ next_distr = factory_->getICATMixtureDistr(charge);
if(next_distr != NULL)
next->insertAtEnd(next_distr);
}
@@ -369,19 +367,19 @@
next_distr = factory_->getICATMixtureDistr(charge, spectrast_icat_);
if (next_distr != NULL) {
next->insertAtEnd(next_distr);
- }
+ }
}
- }
+ }
// END HENRY
-
+
if(glyc_) {
- next_distr = factory_->getGlycMixtureDistr(charge);
+ next_distr = factory_->getGlycMixtureDistr(charge);
if(next_distr != NULL)
next->insertAtEnd(next_distr);
}
if(phospho_) {
- next_distr = factory_->getPhosphoMixtureDistr(charge);
+ next_distr = factory_->getPhosphoMixtureDistr(charge);
if(next_distr != NULL)
next->insertAtEnd(next_distr);
}
@@ -403,7 +401,7 @@
//Using same pI distribution for all charges
if (pI_distr == NULL) {
cout << "adding pI mixture distr" << endl;
- next_distr = factory_->getPIMixtureDistr(charge);
+ next_distr = factory_->getPIMixtureDistr(charge);
pI_distr = (KernelDensityPIMixtureDistr*)next_distr;
}
else {
@@ -419,7 +417,7 @@
//Using same RT distribution for all charges
if (RT_distr == NULL) {
cout << "adding Retention Time mixture distr" << endl;
- next_distr = factory_->getRTMixtureDistr(charge);
+ next_distr = factory_->getRTMixtureDistr(charge);
RT_distr = (RTMixtureDistr*)next_distr;
}
else {
@@ -431,14 +429,14 @@
}
-
+
/////////////////////////////////////////////////////////////////////////////////////////////
mixtureDistrs_->insertAtEnd(next);
}
-
+
}
void MixtureModel::common_constructor_init() {
@@ -452,13 +450,13 @@
dec_count_ = 0;
fwd_count_ = 0;
-
+
rep_specs_ = NULL;
mixtureDistrs_=NULL; // mixture distributions by charge
inds_=NULL;
-
+
run_inds_=NULL; //DDS: indeces ordered by run
run_chrg_inds_=NULL;
num_runs_=0;
@@ -467,20 +465,20 @@
max_num_iters_=0;
prev_log_term_=NULL;
maxnumiters_=0;
-
+
priors_=NULL; // for each charge
numspectra_=NULL;
done_=NULL; // iterations to model termination
extradone_=NULL; // iterations to model termination
totitrs_ = NULL;
- verbose_=False; // quiet!!!
+ verbose_=False; // quiet!!
icat_=False; // use icat peptide cys information for probs
glyc_=False; // use peptide N-glycosylation motif for probs
phospho_=False; // use peptide phospho motif for probs
use_adj_probs_=True; // constraint for 2+ and 3+ interpretations of same spectrum
gamma_=False; // use gamma distribution for fval distribution among incorrect search results
num_pairs_=0;
- pairedspecs_=NULL;
+ pairedspecs_=NULL;
adjusted_=NULL;
filename_=NULL;
max_num_iters_=0;
@@ -494,10 +492,10 @@
nmc_name_=NULL;
enzyme_=NULL;
qtof_ = False;
-
+
factory_=NULL;
index_=0;
-
+
pI_ptr_=NULL;
RT_ptr_=NULL;
isoMass_ptr_= new Array<MixtureDistr*>;
@@ -568,17 +566,17 @@
for (int i=0; i<inds_->length(); i++)
delete (*inds_)[i];
delete inds_;
-
+
for (int i=0; i<run_inds_->length(); i++)
delete (*run_inds_)[i];
delete run_inds_; //DDS: indeces ordered by run
-
+
delete[] priors_; // for each charge
delete[] numspectra_;
delete[] done_; // iterations to model termination
delete[] extradone_; // iterations to model termination
delete[] totitrs_; // iterations to model termination
- delete[] pairedspecs_;
+ delete[] pairedspecs_;
delete adjusted_;
delete[] filename_;
delete[] pseudonegs_;
@@ -589,9 +587,9 @@
delete[] negOnly_;
delete[] ignore_;
delete[] prev_log_term_;
-
+
delete factory_;
-
+
//pointer to mixture distribution not allocated delete pI_ptr_;
//pointer to mixture distribution not allocated delete RT_ptr_;
@@ -652,7 +650,7 @@
delete enzyme_;
enzyme_ = new char[strlen(enz)+1];
strcpy(enzyme_, enz);
-
+
// now pass to factory
if(factory_ != NULL) {
for(int ch = 0; ch < MAX_CHARGE; ch++) {
@@ -690,10 +688,10 @@
void MixtureModel::enterData(SearchResult* result) {
-
+
// must first enter spectrum..., but strip off end charge
char* spectrum = new char[strlen(result->spectrum_)+1];
-
+
std::string tmp = result->spectrum_;
boost::algorithm::trim(tmp);
@@ -702,10 +700,10 @@
if (!strcmp("20100422_04_control_07.10165.10165.10",spectrum) ) {
cerr << "DDS: DEBUG" << endl;
}
- //DDS: Doesn't work if charge is more than 2 characters!!!
+ //DDS: Doesn't work if charge is more than 2 characters!!
// HENRY: strip off charge also if assumed_charge is zero (SpectraST case)
- if((strlen(spectrum) > 2) &&
- (spectrum[strlen(spectrum)-2] == '.') &&
+ if((strlen(spectrum) > 2) &&
+ (spectrum[strlen(spectrum)-2] == '.') &&
((spectrum[strlen(spectrum)-1] == result->charge_ + '0') || (spectrum[strlen(spectrum)-1] == '0'))) {
spectrum[strlen(spectrum)-2] = 0;
@@ -724,16 +722,16 @@
// spectrum[strlen(spectrum)-2] = 0;
}
// END DDS
-
+
int charge = result->charge_ - 1;
-
+
if (accMass_) {
(*accmass_inds_)[charge]->insertAtEnd(result->neutral_mass_);
}
// HENRY
if (strcasecmp(search_engine_, "SPECTRAST") == 0) {
- (*libprobs_)[charge]->insertAtEnd(((SpectraSTResult*)result)->libprob_);
+ (*libprobs_)[charge]->insertAtEnd(((SpectraSTResult*)result)->libprob_);
}
// END HENRY
@@ -746,7 +744,7 @@
}
(*(*mixtureDistrs_)[charge])[k]->enter(result);
}
-
+
(*spectra_)[charge]->insertAtEnd(spectrum);
(*probs_)[charge]->insertAtEnd(0.5);
(*inds_)[charge]->insertAtEnd(index_++);
@@ -756,7 +754,7 @@
for (int t = 0; t < result->proteins_->size(); t++) {
// for (int t = 0; t < 1; t++) {
if ((*result->proteins_)[t] == strstr((*result->proteins_)[t], decoy_label_) ||
- (strcasecmp(search_engine_, "SPECTRAST") == 0 &&
+ (strcasecmp(search_engine_, "SPECTRAST") == 0 &&
((SpectraSTResult*)result)->libremark_.find(decoy_label_) == 0)) {
thisdecoy &= true;
}
@@ -765,7 +763,7 @@
break;
}
//protein name starts with DECOY tag, or in SpectraST's case, either protein libremark starts with DECOY tag
-
+
}
@@ -774,14 +772,14 @@
//cerr << result->spectrum_ << endl;
has_decoy_ = true;
dec_count_++;
-
+
}
else {
(*isdecoy_inds_)[charge]->insertAtEnd(False);
fwd_count_++;
}
}
-
+
}
@@ -817,13 +815,13 @@
exit(1);
}
-
+
while(fin >> tag) {
if(strcmp(tag, "pep") == 0) {
fin >> pep;
if(! icat && strstr(pep, "C") != NULL && fval >= min_fval) {
- if(strstr(pep, "C*") != NULL || strstr(pep, "C#") != NULL || strstr(pep, "C@") != NULL)
+ if(strstr(pep, "C*") != NULL || strstr(pep, "C#") != NULL || strstr(pep, "C@") != NULL)
num_mod++;
else
num_unmod++;
@@ -855,8 +853,8 @@
} // while
fin.close();
- double pct_mod = 0;
- double pct_pos = 0;
+ double pct_mod = 0;
+ double pct_pos = 0;
if(num_mod + num_unmod > 0) {
pct_mod = double(num_mod)/(num_mod + num_unmod);
@@ -890,12 +888,12 @@
for(int charge = 0; charge < MAX_CHARGE; charge++) {
tot += (*spectra_)[charge]->size();
for(int k = 0; k < (*mixtureDistrs_)[charge]->size(); k++) {
- if (pI_dist == (*(*mixtureDistrs_)[charge])[k] ||
+ if (pI_dist == (*(*mixtureDistrs_)[charge])[k] ||
strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density calc pI [pI]")==0) {
pI_dist = (*(*mixtureDistrs_)[charge])[k];
continue;
}
- if (RT_dist == (*(*mixtureDistrs_)[charge])[k] ||
+ if (RT_dist == (*(*mixtureDistrs_)[charge])[k] ||
strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density SSRCalc RT [RT]")==0) {
RT_dist = (*(*mixtureDistrs_)[charge])[k];
continue;
@@ -914,9 +912,9 @@
cerr << " read in no data" << endl;
exit(1);
}
-
+
// if (pI_dist != NULL && tot != pI_dist->getNumVals()) {
- // cerr << "have " << tot << " spectra and " << pI_dist->getNumVals()
+ // cerr << "have " << tot << " spectra and " << pI_dist->getNumVals()
// << " values for " << pI_dist->getName() << endl;
// exit(1);
//}
@@ -962,7 +960,7 @@
char* result = strstr(data, maldi_tag);
if(result != NULL) {
- if(strlen(result) > strlen(maldi_tag) && result[strlen(maldi_tag)] == '1')
+ if(strlen(result) > strlen(maldi_tag) && result[strlen(maldi_tag)] == '1')
maldi_ = True;
else maldi_ = False;
maldi_set_ = True;
@@ -976,7 +974,7 @@
char tag[75];
char value[100];
char spec[100];
-
+
// eat first line
if(! fin.getline(data, datasize)) {
cerr << "cannot open " << filename << endl;
@@ -1003,14 +1001,14 @@
}
// initialize fval negative distributions with 0 tryptic termini data, if available
-void MixtureModel::setNegativeDiscrimValDistrs() {
+void MixtureModel::setNegativeDiscrimValDistrs() {
cout << "Initialising statistical models ..." << endl;
-
+
if (use_decoy_) {
cerr << "Found " << dec_count_ << " Decoys, and " << fwd_count_ << " Non-Decoys" << endl;
-
+
if (!has_decoy_) {
- cerr << "WARNING: No decoys with label " << decoy_label_ << " were found in this dataset. "
+ cerr << "WARNING: No decoys with label " << decoy_label_ << " were found in this dataset. "
<< "reverting to fully unsupervised method." << endl;
use_decoy_ = False;
}
@@ -1021,7 +1019,7 @@
pseudonegs_[charge] = ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->
initializeNegDistribution((*isdecoy_inds_)[charge]);
}
- else
+ else
if( !no_ntt_ && getMixtureDistr(discrim_name_, charge) != NULL && getMixtureDistr(ntt_name_, charge) != NULL) {
pseudonegs_[charge] = ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->
initializeNegDistribution(((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge))));
@@ -1050,7 +1048,7 @@
for(int k = 0; k < (*probs_)[charge]->size(); k++) {
tot += (*(*probs_)[charge])[k];
}
-
+
return tot;
}
@@ -1075,7 +1073,7 @@
for(SpectHash::const_iterator z = rep_specs_->begin(); z != rep_specs_->end(); z++) {
for (int i=0; i< (*(*z).second).size(); i++) {
if (charge==(*z->second)[i]->charge_) {
- tot += (*(*probs_)[(*z->second)[i]->charge_])[(*z->second)[i]->data_index_];
+ tot += (*(*probs_)[(*z->second)[i]->charge_])[(*z->second)[i]->data_index_];
break;
}
}
@@ -1121,7 +1119,7 @@
if(qtof_)
for(int ch = 0; ch < MAX_CHARGE; ch++)
if(getMixtureDistr(discrim_name_, ch) != NULL && getMixtureDistr(ntt_name_, ch) != NULL &&
- getMixtureDistr(discrim_name_, ch)->getPosDistr() != NULL)
+ getMixtureDistr(discrim_name_, ch)->getPosDistr() != NULL)
((DecayContinuousMultimixtureDistr*)(getMixtureDistr(discrim_name_, ch)->getPosDistr()))->initWithNTTs((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ch)));
if ( ! no_neg_init_ ) {
@@ -1130,7 +1128,7 @@
counter_ = 1;
extra_counter_ = 1;
-
+
cout << "Iterations: " ;
while( iterate(counter_)) {
if (verbose_) {
@@ -1182,13 +1180,13 @@
probs->insertAtEnd(new Array<double>);
for (int j=0; j<(*run_inds_)[i]->size(); j++) {
(*probs)[i]->insertAtEnd(getOrderedProb(i, j));
- }
+ }
}
-
-
+
+
pI_ptr_->update(probs, 0.99);
-
+
for (int i=0; i<num_runs_; i++) {
delete (*probs)[i];
}
@@ -1199,8 +1197,8 @@
// if not both satisfactorily modeled
for(int charge = 0; charge < MAX_CHARGE; charge++) {
// if (!ignore_[charge]) {
- if((*spectra_)[charge]->size() > 0 &&
- ((*spectra_)[charge]->size() < min_num_specs_ ||
+ if((*spectra_)[charge]->size() > 0 &&
+ ((*spectra_)[charge]->size() < min_num_specs_ ||
(priors_[charge] == 0.0 && (*spectra_)[charge]->size() > 0))) {
negOnly_[charge] = negOnly(charge);
}
@@ -1219,9 +1217,9 @@
Boolean MixtureModel::iterate(int counter, int charge, Boolean force) {
Boolean verbose = False;
-
+
if(!use_decoy_ && !forcedistr_ &&
- (priors_[charge] == 0.0 ||
+ (priors_[charge] == 0.0 ||
((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->noDistr()) ) {
if(((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->reinitialize()) {
@@ -1237,7 +1235,7 @@
return False;
}
}
-
+
if((*spectra_)[charge]->size() == 0) {
updatePriors(charge);
return False; // done
@@ -1247,7 +1245,7 @@
}
Boolean output = False;
-
+
if(updateProbs(charge, force)) {
output = True;
if(verbose)
@@ -1273,9 +1271,9 @@
if(!no_nmc_ && updateDistr(nmc_name_, charge)) {
output = True;
}
-
+
if(updateProbs(charge, force)) {
- ;
+ ;
}
if(verbose)
@@ -1285,28 +1283,28 @@
for(int k = 2; k < (*mixtureDistrs_)[charge]->size(); k++) {
if(verbose)
cout << "next: " << (*(*mixtureDistrs_)[charge])[k]->getName() << endl;
-
+
//DDS: update pI only once per iter
if ((AM_ptr_ != NULL && (*(*mixtureDistrs_)[charge])[k] == AM_ptr_) ||
- strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density calc pI [pI]")==0 ||
- strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density SSRCalc RT [RT]")==0 ) {
-
+ strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density calc pI [pI]")==0 ||
+ strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density SSRCalc RT [RT]")==0 ) {
+
//||
// strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "isotopic peak mass difference [IsoMassDiff]")==0) {
-
+
// for (int i=0; i<num_runs_; i++) {
// Array<double>* run_probs = new Array<double>;
// probs->insertAtEnd(run_probs);
// for (int j=0; j<(*run_inds_)[i]->size(); j++) {
// (*probs)[i]->insertAtEnd(getOrderedProb(i, j));
- // }
+ // }
//}
-
-
-
+
+
+
//pI_distr = (*(*mixtureDistrs_)[charge])[k];
- // skip update of pI, done only once per iteration
+ // skip update of pI, done only once per iteration
continue;
// }
}
@@ -1322,8 +1320,8 @@
}
}
}
-
+
return output;
}
@@ -1352,7 +1350,7 @@
if (ignore_[charge] || (negOnly_[charge] && !forcedistr_)) {
totitrs_[charge] = counter;
extradone_[charge] = counter;
- }
+ }
else if (counter - done_[charge] < extraitrs_) {
iterate(counter, charge, true);
}
@@ -1392,7 +1390,7 @@
}
alldone_ = !output;
-
+
//DDS: Update pI model
MixtureDistr* pI_ptr = NULL;
if (pI_ && alldone_ && pI_ptr_ == NULL) {
@@ -1409,7 +1407,7 @@
}
pI_ptr_ = pI_ptr;
}
-
+
Array<Array<double>*>* probs = new Array<Array<double>*>;
Array<Array<int>*>* ntts = new Array<Array<int>*>;
for (int i=0; i<num_runs_; i++) {
@@ -1422,10 +1420,10 @@
(*ntts)[i]->insertAtEnd(getOrderedNTT(i, j));
}
}
-
-
+
+
output = pI_ptr_->update(probs, min_pI_prob_, ntts, min_pI_ntt_);
-
+
output = False;
for (int j=0; j<num_runs_; j++) {
@@ -1436,19 +1434,19 @@
delete ntts;
}
-
+
//DDS: Update RT model
MixtureDistr* RT_ptr = NULL;
if (RT_ptr_ != NULL) {
-//DDS: Learn RT coeffs from all peptides together (not by run)
+//DDS: Learn RT coeffs from all peptides together (not by run)
// Array<double>* probs = new Array<double>(index_);
// for (int i=0; i<num_runs_; i++) {
// for (int j=0; j<(*run_inds_)[i]->size(); j++) {
// (*probs)[(*(*run_inds_)[i])[j]] = getOrderedProb(i, j);
// }
// }
-
+
// RT_ptr_->update(probs);
// delete probs;
@@ -1470,7 +1468,7 @@
}
RT_ptr_ = RT_ptr;
}
-
+
Array<Array<double>*>* probs = new Array<Array<double>*>;
Array<Array<int>*>* ntts = new Array<Array<int>*>;
for (int i=0; i<num_runs_; i++) {
@@ -1484,24 +1482,24 @@
}
}
-
+
// Array<double>* probs = new Array<double>(index_);
// Array<int>* ntts = new Array<int>(index_);
-//DDS: Learn RT coeffs from all peptides together (not by run)
+//DDS: Learn RT coeffs from all peptides together (not by run)
//for (int i=0; i<num_runs_; i++) {
// for (int j=0; j<(*run_inds_)[i]->size(); j++) {
// (*probs)[(*(*run_inds_)[i])[j]] = getOrderedProb(i, j);
// (*ntts)[(*(*run_inds_)[i])[j]] = getOrderedNTT(i, j);
// }
//}
-
-
+
+
int code=0;
output = RT_ptr_->update(probs, min_RT_prob_, ntts, min_RT_ntt_, code);
-
+
if (code >= 1) {
//delete RT_ptr_;
//RT_ptr_ = NULL;
@@ -1509,7 +1507,7 @@
}
alldone_ = !output;
-
+
for (int j=0; j<num_runs_; j++) {
delete (*probs)[j];
delete (*ntts)[j];
@@ -1541,7 +1539,7 @@
}
updateProbs(charge, True);
}
-
+
if (done_[charge] < 0) {
done_[charge];
}
@@ -1551,7 +1549,7 @@
}
}
-
+
//DDS: Done pI model update
return output;
@@ -1580,9 +1578,9 @@
newprobs->insertAtEnd(computeProb(charge, k));
if(fabs((*newprobs)[k] - (*(*probs_)[charge])[k]) >= maxdiff) {
output = True;
- }
+ }
}
-
+
if(output || force) {
assert(newprobs->size() == (*probs_)[charge]->size());
delete (*probs_)[charge];
@@ -1600,7 +1598,7 @@
Boolean output = False;
double maxdiff = 0.002;
-
+
// HENRY: Need tighter convergence tolerance for probabilities!!
if (strcasecmp(search_engine_, "SPECTRAST") == 0) {
maxdiff = 0.00001;
@@ -1644,7 +1642,7 @@
// deprecated
Boolean MixtureModel::maldi(char* spec) {
- return False;
+ return False;
}
@@ -1652,7 +1650,7 @@
if(maldi_set_) {
return;
}
- maldi_ = False;
+ maldi_ = False;
// rely on maldi tag from .esi file in future...
maldi_set_ = True;
@@ -1728,7 +1726,7 @@
} // next data member
priors_[charge] = 0.0; // set this to 0
- ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->resetTot(); // set prior to 0
+ ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->resetTot(); // set prior to 0
// if(charge == 1 || charge == 2) {
// use_adj_probs_ = False;
@@ -1761,16 +1759,16 @@
MixtureDistr* isoMass_ptr = NULL;
for(int k = 0; k < (*mixtureDistrs_)[charge]->size(); k++) {
if (strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density calc pI [pI]")==0) {
- pI_ptr = (*(*mixtureDistrs_)[charge])[k];
+ pI_ptr = (*(*mixtureDistrs_)[charge])[k];
}
else if (strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "kernel density SSRCalc RT [RT]")==0) {
- RT_ptr = (*(*mixtureDistrs_)[charge])[k];
+ RT_ptr = (*(*mixtureDistrs_)[charge])[k];
}
else if (strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "isotopic peak mass difference [IsoMassDiff]")==0) {
- isoMass_ptr = (*(*mixtureDistrs_)[charge])[k];
+ isoMass_ptr = (*(*mixtureDistrs_)[charge])[k];
}
else if (accMass_ && strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), "accurate mass diff [massd]")==0 ) {
- massDiff_ptr = (*(*mixtureDistrs_)[charge])[k];
+ massDiff_ptr = (*(*mixtureDistrs_)[charge])[k];
}
else if (no_ntt_ && strcmp((*(*mixtureDistrs_)[charge])[k]->getName(), ntt_name_)==0 ) {
}
@@ -1790,11 +1788,11 @@
if (massDiff_ptr != NULL ) {
tmppos *= massDiff_ptr->getPosProb((*(*inds_)[charge])[index]);
- tmpneg *= massDiff_ptr->getNegProb((*(*inds_)[charge])[index]);
+ tmpneg *= massDiff_ptr->getNegProb((*(*inds_)[charge])[index]);
}
if (isoMass_ptr != NULL) {
tmppos *= isoMass_ptr->getPosProb(index);
- tmpneg *= isoMass_ptr->getNegProb(index);
+ tmpneg *= isoMass_ptr->getNegProb(index);
}
double maxrwd = 10;
@@ -1823,29 +1821,29 @@
return 0.0;
}
- if (pI_ptr_ != NULL) {
+ if (pI_ptr_ != NULL) {
posprob *= pI_ptr->getPosProb((*(*inds_)[charge])[index]);
- negprob *= pI_ptr->getNegProb((*(*inds_)[charge])[index]);
+ negprob *= pI_ptr->getNegProb((*(*inds_)[charge])[index]);
}
- if (RT_ && RT_ptr_ != NULL) {
+ if (RT_ && RT_ptr_ != NULL) {
posprob *= RT_ptr->getPosProb((*(*inds_)[charge])[index]);
- negprob *= RT_ptr->getNegProb((*(*inds_)[charge])[index]);
+ negprob *= RT_ptr->getNegProb((*(*inds_)[charge])[index]);
}
-
-
+
+
posprob *= tmppos;
negprob *= tmpneg;
-
+
if(posprob + negprob <= 0.0) {
return 0.0;
- }
+ }
double prob = (posprob / (posprob + negprob));
if (!isfinite(prob)) { // BSP - catch INF, NAN, -INF
return 0.0;
}
-
+
// HENRY
if (strcasecmp(search_engine_, "SPECTRAST") == 0) {
if (multiply_by_spectrast_lib_probs_) {
@@ -1853,7 +1851,7 @@
}
}
// END HENRY
-
+
return prob;
}
@@ -1869,8 +1867,8 @@
return orig_prob;
}
- double posntt1 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTPosFraction(orig_ntt);
- double negntt1 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTNegFraction(orig_ntt);
+ double posntt1 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTPosFraction(orig_ntt);
+ double negntt1 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTNegFraction(orig_ntt);
if(maldi_) {
posntt1 *= ((DecayContinuousMultimixtureDistr*)(((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->getPosDistr()))->getMixtureProbWithNTT(orig_ntt, index);
negntt1 *= ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->getNegProb(index);
@@ -1882,8 +1880,8 @@
else {
ratio1 = negntt1 / posntt1;
}
- double posntt2 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTPosFraction(ntt);
- double negntt2 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTNegFraction(ntt);
+ double posntt2 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTPosFraction(ntt);
+ double negntt2 = ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTNegFraction(ntt);
if(maldi_) {
posntt2 *= ((DecayContinuousMultimixtureDistr*)(((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->getPosDistr()))->getMixtureProbWithNTT(ntt, index);
negntt2 *= ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->getNegProb(index);
@@ -1956,7 +1954,7 @@
cerr << "cannot read data" << endl;
exit(1);
}
-
+
while(is >> tag) {
if(strcmp(tag, "spec") == 0) {
@@ -1995,14 +1993,14 @@
/*
if (strstr( (*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_],"022008_F8_ETD_1.03159.03159" ) != NULL ) {
cerr <<"DDS: DEBUG"<<endl;
- }
+ }
if (strstr( spectrum, "022008_F8_ETD_1.03159.03159" ) != NULL ) {
cerr <<"DDS: DEBUG"<<endl;
- }
+ }
if (strstr( spectrum, (*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_] ) == NULL ) {
cerr <<"DDS: DEBUG"<<endl;
- }
+ }
*/
if (rep_specs_ == NULL) {
@@ -2040,12 +2038,12 @@
Boolean result = False;
if(z != rep_specs_->end() && use_adj_probs_ && ordered_[index].charge_ >= 0 && ordered_[index].charge_ < MAX_CHARGE) {
-
+
//Boolean result = ! strcmp(spec, pairedspecs_[ordered_[index].index_].name_);
for (int i=0; i < (*(*z).second).size(); i++) {
result = ! strcmp(spectrum, (*(*(*z).second)[i]).name_) ;
if (result) break;
-
+
}
if(modify_spec)
delete [] spec;
@@ -2053,7 +2051,7 @@
}
result = ! strncmp(spectrum, (*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_], strlen((*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_])) && spectrum[strlen(spectrum)-1] != ordered_[index].charge_ - 47;
-
+
if(modify_spec && spec != NULL)
delete [] spec;
return result;
@@ -2070,15 +2068,15 @@
findRepSpecs();
}
SpectHash::const_iterator z = rep_specs_->find((*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_]);
-
+
// if (!strcmp("20100422_01_control_04.07416.07416", (*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_]) ) {
// cerr << "DDS: DEBUG" << endl;
//}
-
+
if (ignore_[ordered_[index].charge_] || (negOnly_[ordered_[index].charge_] && !forcedistr_) ){
return 0;
}
-
+
if( z != rep_specs_->end() && use_adj_probs_ && ordered_[index].charge_ >= 0 && ordered_[index].charge_ < MAX_CHARGE) {
//return pairedspecs_[ordered_[index].index_].prob_;
for (int i=0; i < (*(*z).second).size(); i++) {
@@ -2088,14 +2086,14 @@
}
}
}
-
+
return (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_];
}
double MixtureModel::getOrderedProb(int run_idx, int index) {
-
+
return this->getOrderedProb((*(*run_inds_)[run_idx])[index]);
-}
+}
int MixtureModel::getOrderedNTT(int run_idx, int index) {
int idx = (*(*run_inds_)[run_idx])[index];
@@ -2103,12 +2101,12 @@
}
//double MixtureModel::getOrderedProb(int run_idx, int charge, int index) {
-
+
// return this->getOrderedProb((*(*run_chrg_inds_)[run_idx])[index]);
//}
-
+
Array<Tag*>* MixtureModel::getModelSummaryTags(const char* prog_name, const char* time, const Array<char*>* inputfiles, double minprob, const char* options) {
Array<Tag*>* output = new Array<Tag*>;
Array<Tag*>* next = NULL;
@@ -2134,22 +2132,25 @@
nexttag->setAttributeValue("est_tot_num_correct", text);
output->insertAtEnd(nexttag);
-
// handle inputfiles
for(int k = 0; k < inputfiles->size(); k++) {
nexttag = new Tag("inputfile", True, True);
nexttag->setAttributeValue("name", (*inputfiles)[k]);
output->insertAtEnd(nexttag);
}
+
+ //generate info for charge specific error and roc curves
+ for (int i = 0; i <= MAX_CHARGE; i++)
+ {
+ next = getRocDataPointTags(i); //generate tag group for each charge state, when i = 0 all charge states are taken into account
+ if(next != NULL)
+ {
+ for(int j = 0; j != next->size(); j++)
+ output->insertAtEnd((*next)[j]);
+ delete next;
+ }
+ }
- next = getRocDataPointTags();
- if(next != NULL) {
- for(int i = 0; i < next->size(); i++) {
- output->insertAtEnd((*next)[i]);
- }
- delete next;
- }
-
// here distribution points
next = getDiscrimValDistrTags();
if(next != NULL) {
@@ -2169,7 +2170,7 @@
if(ignore_[charge] || (negOnly_[charge] && !forcedistr_) ) {
if(getNegOnlyCharge(charge) < 0)
sprintf(text, "using training data positive distributions to identify candidates ('-%d') above background ('0')", charge+1);
- else
+ else
sprintf(text, "using %d+ positive distributions to identify candidates ('-%d') above background ('0')", getNegOnlyCharge(charge)+1, charge+1);
}
else if(pseudonegs_[charge]) {
@@ -2247,15 +2248,15 @@
sprintf(next, "(%0.4f,%0.4f,%0.4f)", prob, prob, prob);
}
else {
-
+
// if (!strcmp("20100422_01_control_04.07416.07416", (*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_]) ) {
// cerr << "DDS: DEBUG" << endl;
//}
z = rep_specs_->find((*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_]);
-
+
if(z != rep_specs_->end() && use_adj_probs_ && ordered_[index].charge_ >= 0 && ordered_[index].charge_ < MAX_CHARGE) {
-
+
//Boolean result = ! strcmp(spec, pairedspecs_[ordered_[index].index_].name_);
for (i=0; i < (*(*z).second).size(); i++) {
if ( ordered_[index].charge_ == (*(*(*z).second)[i]).charge_ ) break;
@@ -2265,7 +2266,7 @@
//DDS: ASSUME its always in the hash
//sprintf(next, "(%0.4f,%0.4f,%0.4f)", computeProbWithNTT(pairedspecs_[ordered_[index].index_].charge_, pairedspecs_[ordered_[index].index_].ntt_, pairedspecs_[ordered_[index].index_].prob_, 0, ordered_[index].index_),computeProbWithNTT(pairedspecs_[ordered_[index].index_].charge_, pairedspecs_[ordered_[index].index_].ntt_, pairedspecs_[ordered_[index].index_].prob_, 1, ordered_[index].index_),computeProbWithNTT(pairedspecs_[ordered_[index].index_].charge_, pairedspecs_[ordered_[index].index_].ntt_, pairedspecs_[ordered_[index].index_].prob_, 2, ordered_[index].index_));
-
+
sprintf(next, "(%0.4f,%0.4f,%0.4f)", computeProbWithNTT((*(*(*z).second)[i]).charge_, (*(*(*z).second)[i]).ntt_, (*(*probs_)[(*z->second)[i]->charge_])[(*z->second)[i]->data_index_], 0, ordered_[index].index_),computeProbWithNTT((*(*(*z).second)[i]).charge_, (*(*(*z).second)[i]).ntt_, (*(*probs_)[(*z->second)[i]->charge_])[(*z->second)[i]->data_index_], 1, ordered_[index].index_),computeProbWithNTT((*(*(*z).second)[i]).charge_, (*(*(*z).second)[i]).ntt_, (*(*probs_)[(*z->second)[i]->charge_])[(*z->second)[i]->data_index_], 2, ordered_[index].index_));
}
@@ -2284,7 +2285,7 @@
sprintf(next, "%0.4f", prob);
nexttag->setAttributeValue("probability", next);
}
-
+
if (no_ntt_ && (ignore_[ordered_[index].charge_] || (negOnly_[ ordered_[index].charge_] && !forcedistr_)) ) {
sprintf(next, "(%0.0f,%0.0f,%0.0f)", prob, prob, prob);
}
@@ -2295,11 +2296,11 @@
sprintf(next, "(%0.0f,%0.0f,%0.0f)", computeProbWithNTT(ordered_[index].charge_, ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ordered_[index].charge_)))->getNTTValue(ordered_[index].index_), (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_], 0, ordered_[index].index_), computeProbWithNTT(ordered_[index].charge_, ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ordered_[index].charge_)))->getNTTValue(ordered_[index].index_), (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_], 1, ordered_[index].index_), computeProbWithNTT(ordered_[index].charge_, ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ordered_[index].charge_)))->getNTTValue(ordered_[index].index_), (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_], 2, ordered_[index].index_));
else
sprintf(next, "(%0.4f,%0.4f,%0.4f)", computeProbWithNTT(ordered_[index].charge_, ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ordered_[index].charge_)))->getNTTValue(ordered_[index].index_), (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_], 0, ordered_[index].index_), computeProbWithNTT(ordered_[index].charge_, ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ordered_[index].charge_)))->getNTTValue(ordered_[index].index_), (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_], 1, ordered_[index].index_), computeProbWithNTT(ordered_[index].charge_, ((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, ordered_[index].charge_)))->getNTTValue(ordered_[index].index_), (*(*probs_)[ordered_[index].charge_])[ordered_[index].index_], 2, ordered_[index].index_));
-
+
nexttag->setAttributeValue("all_ntt_prob", next);
- if(ignore_[ordered_[index].charge_] || (negOnly_[ordered_[index].charge_] && !forcedistr_))
+ if(ignore_[ordered_[index].charge_] || (negOnly_[ordered_[index].charge_] && !forcedistr_))
nexttag->setAttributeValue("analysis", "incomplete");
else if(ordered_[index].charge_ < MAX_CHARGE && ordered_[index].charge_ >= 0 && isAdjusted(ordered_[index].index_))
nexttag->setAttributeValue("analysis", "adjusted");
@@ -2321,7 +2322,7 @@
output->insertAtEnd(nexttag);
return output; // done
-
+
}
int MixtureModel::getOrderedDataIndex(int index) {
@@ -2329,11 +2330,11 @@
SpectHash::const_iterator z = rep_specs_->find((*(*spectra_)[ordered_[index].charge_])[ordered_[index].index_]);
int i;
if(z != rep_specs_->end() && use_adj_probs_ && ordered_[index].charge_ >= 0 && ordered_[index].charge_ < MAX_CHARGE) {
-
+
for (i=0; i < (*(*z).second).size(); i++) {
if (ordered_[index].charge_ == (*(*(*z).second)[i]).charge_ ) return (*(*(*z).second)[i]).data_index_;
}
-
+
}
}
return ordered_[index].index_;
@@ -2346,7 +2347,7 @@
Boolean allchg_idx_incr = False;
Tag* nexttag = new Tag("search_score_summary", True, False);
output->insertAtEnd(nexttag);
- for(int k = 0; k < (*mixtureDistrs_)[charge]->size(); k++)
+ for(int k = 0; k < (*mixtureDistrs_)[charge]->size(); k++)
if (!RT_ && strcmp((*(*mixtureDistrs_)[charge])[k]->getTag(), "RT")==0) {
continue;
}
@@ -2378,24 +2379,24 @@
char* value = (*(*mixtureDistrs_)[charge])[k]->getStringValue(allchg_index_);
nexttag->setAttributeValue("value", value);
delete[] value; // setAttributeValue creates a copy of the value string -- delete the one we created
- output->insertAtEnd(nexttag);
+ output->insertAtEnd(nexttag);
}
else {
char* value = (*(*mixtureDistrs_)[charge])[k]->getStringValue(index);
nexttag->setAttributeValue("value", value);
delete[] value; // setAttributeValue creates a copy of the value string -- delete the one we created
- output->insertAtEnd(nexttag);
+ output->insertAtEnd(nexttag);
}
-
+
}
else {
cout << "charge " << (charge+1) << " index " << index << " vs " << (*(*mixtureDistrs_)[charge])[k]->getNumVals() << endl;
}
-
+
output->insertAtEnd(new Tag("search_score_summary", False, True));
-
+
allchg_index_++;
-
+
return output;
}
@@ -2439,11 +2440,11 @@
if ((num_ordered_ != total) || !ordered_) {
num_ordered_ = total;
delete[] ordered_; // don't leak - may have been set by initResultsInOrder
- ordered_ = new OrderedResult[total];
+ ordered_ = new OrderedResult[total];
}
// first the 1+
// for(int k = 0; k < (*spectra_)[0]->size(); k++) {
- // ordered_[ordered_ind++].init(0, k, (*(*inds_)[0])[k]);
+ // ordered_[ordered_ind++].init(0, k, (*(*inds_)[0])[k]);
//}
//if(use_adj_probs_) {
// for(int k = 0; k < num_pairs_; k++) {
@@ -2468,9 +2469,9 @@
}
}
// for(SpectHash::const_iterator z = rep_specs_->begin(); z != rep_specs_->end(); z++) {
- // int ch = (*z->second)[0]->charge_;
- // int data_idx = (*z->second)[0]->data_index_;
- // int idx = (*z->second)[0]->ind_;
+ // int ch = (*z->second)[0]->charge_;
+ // int data_idx = (*z->second)[0]->data_index_;
+ // int idx = (*z->second)[0]->ind_;
// ordered_[ordered_ind++].init(ch, idx, data_idx);
// }
@@ -2495,7 +2496,7 @@
// first the 1+
//int k;
//for(k = 0; k < (*spectra_)[0]->size(); k++) {
- // ordered[ordered_ind++].init(0, k, (*(*inds_)[0])[k]);
+ // ordered[ordered_ind++].init(0, k, (*(*inds_)[0])[k]);
//}
// and the 4+, 5+, ...
//for(int ch = 3; ch < MAX_CHARGE; ch++) {
@@ -2506,7 +2507,7 @@
//// and the 2+, 3+
//if(use_adj_probs_) {
// for(int k = 0; k < num_pairs_; k++) {
- // ordered[ordered_ind++].init(pairedspecs_[k].charge_, k, pairedspecs_[k].ind_);
+ // ordered[ordered_ind++].init(pairedspecs_[k].charge_, k, pairedspecs_[k].ind_);
// }
//}
//else {
@@ -2518,16 +2519,16 @@
// }
for(SpectHash::const_iterator z = rep_specs_->begin(); z != rep_specs_->end(); z++) {
- int ch = (*z->second)[0]->charge_;
- int data_idx = (*z->second)[0]->data_index_;
- int idx = (*z->second)[0]->ind_;
+ int ch = (*z->second)[0]->charge_;
+ int data_idx = (*z->second)[0]->data_index_;
+ int idx = (*z->second)[0]->ind_;
if (use_decoy_) {
ordered_[ordered_ind++].init(ch, idx, data_idx, (*(*isdecoy_inds_)[ch])[idx]);
}
else {
ordered_[ordered_ind++].init(ch, idx, data_idx);
}
-
+
}
// now order them
@@ -2629,7 +2630,7 @@
}
else {
fout << computeProbWithNTT(
- pairedspecs_[k].charge_,
+ pairedspecs_[k].charge_,
pairedspecs_[k].ntt_,
pairedspecs_[k].prob_, n, pairedspecs_[k].ind_);
}
@@ -2643,7 +2644,7 @@
}
fout << endl;
}
-
+
for(int charge = 3; charge < MAX_CHARGE; charge++) {
for(int k = 0; k < (*spectra_)[charge]->size(); k++) {
fout << (*(*spectra_)[charge])[k] << "." << (charge+1) << "\t";
@@ -2672,7 +2673,7 @@
}
fout << "\n";
}
- }
+ }
}
else {
for(int charge = 1; charge < MAX_CHARGE; charge++) {
@@ -2727,7 +2728,7 @@
Boolean MixtureModel::isAdjusted(int index) {
set<int>::const_iterator itr = adj_set_.find(index);
-
+
if (itr != adj_set_.end()) {
return True;
}
@@ -2754,13 +2755,14 @@
double maxval = mixtureDistr->getMaxVal();
double window = mixtureDistr->getWindow();
- // double minval = -5.0;
+ // double minval = -5.0;
//double maxval = 10.0;
//double window = 0.2;
// END HENRY
int numwins = lrint((maxval - minval)/window);
+
for(int k = 0; k < numwins; k++) {
tag = new Tag("distribution_point", True, True);
sprintf(text, "%0.2f", (k+0.5)*window + minval);
@@ -2772,7 +2774,7 @@
strcpy(text, "0.0");
sprintf(attr, "obs_%d_distr", charge+1);
tag->setAttributeValue(attr, text);
-
+
sprintf(text, "%0.2f", ((DiscrimValMixtureDistr*)(getMixtureDistr(discrim_name_, charge)))->posSlice(k*window + minval, (k+1)*window + minval));
if (strcmp(text, "nan") == 0)
strcpy(text, "0.0");
@@ -2785,8 +2787,6 @@
sprintf(attr, "model_%d_neg_distr", charge+1);
tag->setAttributeValue(attr, text);
- if(charge < 2) {
- }
} // next charge
output->insertAtEnd(tag);
}
@@ -2823,7 +2823,7 @@
fprintf(fout, "\n\nMODEL DISTRIBUTIONS BY NTT VALUE\n");
if(maldi_)
fprintf(fout, "#Fval\tntt0 1+ pos\tntt1 1+ pos\tntt2 1+ pos\n");
- else
+ else
fprintf(fout, "#Fval\tntt0 1+ pos\tntt0 2+ pos\tntt0 3+ pos\tntt1 1+ pos\tntt1 2+ pos\tntt1 3+ pos\tntt2 1+ pos\tntt2 2+ pos\tntt2 3+ pos\n");
for(int k = 0; k < numwins; k++) {
fprintf(fout, "%0.2f", (k+0.5)*window + minval);
@@ -2870,7 +2870,7 @@
char* options = NULL;
int start_options = -1;
for(int k = 0; k < (int) strlen(interact_command); k++) {
- if(k < (int) strlen(interact_command)-3 && interact_command[k] == ' ' &&
+ if(k < (int) strlen(interact_command)-3 && interact_command[k] == ' ' &&
interact_command[k+1] == '-' && interact_command[k+2] == 'O')
start_options = k+3;
if(start_options > -1 && (k == (int) strlen(interact_command)-1 || interact_command[k+1] == ' ')) {
@@ -2912,7 +2912,7 @@
if(ignore_[charge] || (negOnly_[charge] && !forcedistr_) ) {
if(getNegOnlyCharge(charge) < 0)
fprintf(fout, "using training data positive distributions to identify candidates ('-%d') above background ('0')\n", charge+1);
- else
+ else
fprintf(fout, "using %d+ positive distributions to identify candidates ('-%d') above background ('0')\n", getNegOnlyCharge(charge)+1, charge+1);
}
else if(pseudonegs_[charge]) {
@@ -2950,7 +2950,7 @@
next[len] = '.';
next[len + 1] = charge + '1';
next[len + 2] = 0;
- pairedspecs_[total++].init(next,
+ pairedspecs_[total++].init(next,
((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTValue(k), (*(*inds_)[charge])[k], k);
delete [] next;
}
@@ -2984,19 +2984,19 @@
rep_specs_ = new SpectHash();
for(charge = 0; charge < MAX_CHARGE; charge++) {
for(k = 0; k < (*probs_)[charge]->size(); k++) {
-
+
int len=(int)strlen((*(*spectra_)[charge])[k]);
char* next = new char[len+3];
strcpy(next, (*(*spectra_)[charge])[k]);
-
+
next[len] = '.';
//DDS: TODO WILL NOT WORK when charge is multi character (e.g. 10)?
next[len + 1] = charge + '1';
next[len + 2] = 0;
-
+
itr = rep_specs_->find( (*(*spectra_)[charge])[k] );
-
+
if ( itr == rep_specs_->end() ) {
total++;
rep_specs_->insert( std::make_pair( (*(*spectra_)[charge])[k], new Array<Spectrum*>() ) );
@@ -3009,7 +3009,7 @@
if ( use_decoy_ && (*(*isdecoy_inds_)[charge])[k]) {
tprob = 0;
}
- Spectrum* s = new Spectrum(next,
+ Spectrum* s = new Spectrum(next,
((NTTMixtureDistr*)(getMixtureDistr(ntt_name_, charge)))->getNTTValue(k), (*(*inds_)[charge])[k], k);
// if (strstr((*itr).first.c_str(), "022008_F8_ETD_1.03159.03159") != NULL) {
@@ -3018,12 +3018,12 @@
if (itr->second->size() <= charge)
itr->second->insertAtEnd( s );
- else
+ else
delete s;
-
+
delete [] next;
-
-
+
+
}
}
}
@@ -3038,8 +3038,6 @@
num_reps_ = (*spectra_)[z]->size(); // 2+ and 3+
}
-
-
if (rep_specs_ == NULL) {
findRepSpecs();
}
@@ -3071,7 +3069,7 @@
probsum += tprob;
allneg += log(1-tprob);
-
+
}
double denom = exp(allneg);
@@ -3089,9 +3087,9 @@
denom += exp(allneg - log(1-tprob) + log(tprob));
}
-
-
-
+
+
+
double tnumer=0;
for (int i=0; i< (*z->second).size(); i++) {
@@ -3109,12 +3107,12 @@
}
tnumer = allneg - log(1-old_prob) + log(old_prob);
- tnumer = exp(tnumer);
-
+ tnumer = exp(tnumer);
+
new_prob = tnumer / denom;
-
-
+
+
//if (probsum > 1) {
// new_prob = old_prob / probsum;
//}
@@ -3122,7 +3120,7 @@
// new_prob = old_prob;
//}
-
+
if (fabs(new_prob - old_prob) >= 0.05 && new_prob > 0.05) {
(*(*probs_)[(*z->second)[i]->charge_])[(*z->second)[i]->data_index_] = new_prob;
adj_set_.insert((*z->second)[i]->ind_);
@@ -3132,154 +3130,213 @@
}
}
+//adds roc curve and error data to xml file for one or all charges - Richie Stauffer, August 2010
+//charge of 0 returns data from all charges that aren't ignored
+//charge of 1 through 7 (MAX_CHARGE) returns data from that charge state alone (if charge is not ignored)
+//other charge values return null
+Array<Tag*>* MixtureModel::getRocDataPointTags(int charge) //takes charge value, not index
+{
+ if ((charge < 0 || charge > MAX_CHARGE) || (charge && (ignore_[charge - 1] || (negOnly_[charge - 1] && !forcedistr_)))) //make sure charge is in range
+ return NULL;
+
+ bool noData = true;
+
+ Array<Tag*>* output = new Array<Tag*>; //array returned
+ Tag* tag;
+
+ char text[500]; //helps convert numbers into text so they can be passed into methods
+ int total = 0;
-Array<Tag*>* MixtureModel::getRocDataPointTags() {
- Array<Tag*>* output = new Array<Tag*>;
- Tag* tag;
- char text[500];
+ if(charge)
+ {
+ if(!ignore_[charge - 1] && (!negOnly_[charge - 1] || forcedistr_))
+ total += (*probs_)[charge - 1]->size();
+ }
+ else
+ {
+ for(int c = 0; c < MAX_CHARGE; c++)
+ if(!ignore_[c] && (!negOnly_[c] || forcedistr_))
+ total += (*probs_)[c]->size();
+ }
- int total = 0;
- for(int charge = 0; charge < MAX_CHARGE; charge++) {
- if(!ignore_[charge] && (! negOnly_[charge] || forcedistr_)) {
- total += (*probs_)[charge]->size();
- }
- }
-
- double* combinedprobs = new double[total];
- total = 0; // index
+ double* combinedprobs = new double[total];
+ total = 0; //index
- for(int charge = 0; charge < MAX_CHARGE; charge++) {
- if(!ignore_[charge] && (! negOnly_[charge] || forcedistr_)) {
- for(int k = 0; k < (*probs_)[charge]->size(); k++) {
- combinedprobs[total++] = (*(*probs_)[charge])[k];
- }
- }
- } // next
-
- qsort(combinedprobs, total, sizeof(double), (int(*)(const void*, const void*)) comp_nums);
+ if(charge)
+ {
+ if(!ignore_[charge - 1] && (! negOnly_[charge - 1] || forcedistr_))
+ for(int k = 0; k < (*probs_)[charge - 1]->size(); k++)
+ combinedprobs[total++] = (*(*probs_)[charge - 1])[k];
+ }
+ else
+ {
+ for(int c = 0; c < MAX_CHARGE; c++)
+ if(!ignore_[c] && (!negOnly_[c] || forcedistr_))
+ for(int k = 0; k != (*probs_)[c]->size(); k++)
+ combinedprobs[total++] = (*(*probs_)[c])[k];
+ }
- // now sens and error as walk down list
- double thresh[] = {0.99, 0.98, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.0};
- int threshind = 0;
- double correct = 0.0;
- double incorrect = 0.0;
- double totcorrect = 0.0;
- for(int k = 0; k < total; k++) {
- totcorrect += combinedprobs[k];
- }
- double grandTotal = totcorrect;
- if(grandTotal > 0.0) {
- for(int k = 0; k < total; k++) {
- double rnd;
- sprintf(text, "%0.4f", combinedprobs[k]);
- rnd = atof(text);
- if(rnd >= thresh[threshind]) {
- correct += combinedprobs[k];
- incorrect += 1 - combinedprobs[k];
- }
- else if (rnd >= 0) {
- tag = new Tag("roc_data_point", True, True);
- sprintf(text, "%0.3f", thresh[threshind]);
- tag->setAttributeValue("min_prob", text);
- sprintf(text, "%0.4f", correct/totcorrect);
- tag->setAttributeValue("sensitivity", text);
+ qsort(combinedprobs, total, sizeof(double), (int(*)(const void*, const void*)) comp_nums);
- if(correct + incorrect > 0.0) {
+ //now sens and error as walk down list
+ double thresh[] = {0.99, 0.98, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.0};
+ int threshind = 0;
+ double correct = 0.0;
+ double incorrect = 0.0;
+ double totcorrect = 0.0;
+
+ for(int k = 0; k < total; k++)
+ totcorrect += combinedprobs[k];
- sprintf(text, "%0.4f", incorrect/(correct + incorrect));
- tag->setAttributeValue("error", text);
-
+ double grandTotal = totcorrect;
+
+ tag = new Tag("roc_error_data", true, false);
+
+ if(charge)
+ {
+ sprintf(text, "%d", charge);
+ tag->setAttributeValue("charge", text);
+ sprintf(text, "%0.1f", getTotalAdjProb(charge - 1));
+ tag->setAttributeValue("charge_est_correct", text);
}
- else {
- sprintf(text, "%0.0f", 0.0);
- tag->setAttributeValue("error", text);
- }
- sprintf(text, "%0.0f", correct);
- tag->setAttributeValue("num_corr", text);
- sprintf(text, "%0.0f", incorrect);
- tag->setAttributeValue("num_incorr", text);
+ else //charge is 0
+ tag->setAttributeValue("charge", "all");
+
output->insertAtEnd(tag);
- threshind++;
- k--; // assay again using next threshold
- }
- } // next member
+
+ if(grandTotal > 0.0)
+ {
+ if(noData)
+ noData = false;
+
+ for(int k = 0; k < total; k++)
+ {
+ double rnd;
+ sprintf(text, "%0.4f", combinedprobs[k]);
+ rnd = atof(text);
+ if(rnd >= thresh[threshind])
+ {
+ correct += combinedprobs[k];
+ incorrect += 1 - combinedprobs[k];
+ }
+ else if (rnd >= 0)
+ {
+ tag = new Tag("roc_data_point", true, true); //creates point
+ sprintf(text, "%0.3f", thresh[threshind]);
+ tag->setAttributeValue("min_prob", text);
+ sprintf(text, "%0.4f", correct / totcorrect);
+ tag->setAttributeValue("sensitivity", text);
+ if(correct + incorrect > 0.0)
+ {
+ sprintf(text, "%0.4f", incorrect/(correct + incorrect));
+ tag->setAttributeValue("error", text);
+ }
+ else
+ {
+ sprintf(text, "%0.0f", 0.0);
+ tag->setAttributeValue("error", text);
+ }
- tag = new Tag("roc_data_point", True, True);
- sprintf(text, "%0.3f", thresh[threshind]);
- tag->setAttributeValue("min_prob", text);
- sprintf(text, "%0.4f", correct/totcorrect);
- tag->setAttributeValue("sensitivity", text);
- sprintf(text, "%0.4f", incorrect/(correct + incorrect));
- tag->setAttributeValue("error", text);
- sprintf(text, "%0.0f", correct);
- tag->setAttributeValue("num_corr", text);
- sprintf(text, "%0.0f", incorrect);
- tag->setAttributeValue("num_incorr", text);
- output->insertAtEnd(tag);
+ sprintf(text, "%0.0f", correct);
+ tag->setAttributeValue("num_corr", text);
+ sprintf(text, "%0.0f", incorrect);
+ tag->setAttributeValue("num_incorr", text);
+ output->insertAtEnd(tag);
+ threshind++;
+ k--; //assay again using next threshold
+ }
+ } //next member
- } // if have data
- else {
- tag = new Tag("roc_data_point", True, True);
- sprintf(text, "%0.3f", 0.0);
- tag->setAttributeValue("min_prob", text);
- sprintf(text, "%0.4f", 0.0);
- tag->setAttributeValue("sensitivity", text);
- sprintf(text, "%0.4f", 0.0);
- tag->setAttributeValue("error", text);
- sprintf(text, "0.0");
- tag->setAttributeValue("num_corr", text);
- sprintf(text, "0.0");
- tag->setAttributeValue("num_incorr", text);
- output->insertAtEnd(tag);
+ tag = new Tag("roc_data_point", true, true); //creates point
+ sprintf(text, "%0.3f", thresh[threshind]);
+ tag->setAttributeValue("min_prob", text);
+ sprintf(text, "%0.4f", correct/totcorrect);
+ tag->setAttributeValue("sensitivity", text);
+ sprintf(text, "%0.4f", incorrect/(correct + incorrect));
+ tag->setAttributeValue("error", text);
+ sprintf(text, "%0.0f", correct);
+ tag->setAttributeValue("num_corr", text);
+ sprintf(text, "%0.0f", incorrect);
+ tag->setAttributeValue("num_incorr", text);
+ output->insertAtEnd(tag);
+ }
+ else
+ {
+ tag = new Tag("roc_data_point", true, true); //creates point
+ sprintf(text, "%0.3f", 0.0);
+ tag->setAttributeValue("min_prob", text);
+ sprintf(text, "%0.4f", 0.0);
+ tag->setAttributeValue("sensitivity", text);
+ sprintf(text, "%0.4f", 0.0);
+ tag->setAttributeValue("error", text);
+ sprintf(text, "0.0");
+ tag->setAttributeValue("num_corr", text);
+ sprintf(text, "0.0");
+ tag->setAttributeValue("num_incorr", text);
+ output->insertAtEnd(tag);
+ }
+
+ if (noData) //if after looping through everything no real data is found return null
+ return NULL;
+
+ //set error rates
+
+ if(grandTotal > 0.0)
+ {
+ double error_rates[] = {0.0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.015, 0.02, 0.025, 0.03, 0.04, 0.05, 0.075, 0.1, 0.15};
+ correct = 0.0;
+ incorrect = 0.0;
+ threshind = 0;
+ for(int k = 0; k < total; k++)
+ {
+ if(incorrect / (correct + incorrect) > error_rates[threshind])
+ {
+ tag = new Tag("error_point", true, true);
+ if(k == 0)
+ {
+ sprintf(text, "%0.3f", error_rates[threshind]);
+ tag->setAttributeValue("error", text);
+ sprintf(text, "%0.3f", combinedprobs[k]);
+ tag->setAttributeValue("min_prob", text);
+ sprintf(text, "%0.0f", correct + combinedprobs[k]);
+ tag->setAttributeValue("num_corr", text);
+ sprintf(text, "%0.0f", incorrect + 1.0 - combinedprobs[k]);
+ tag->setAttributeValue("num_incorr", text);
+ }
+ else
+ {
+ sprintf(text, "%0.3f", error_rates[threshind]);
+ tag->setAttributeValue("error", text);
+ sprintf(text, "%0.3f", combinedprobs[k - 1]);
+ tag->setAttributeValue("min_prob", text);
+ sprintf(text, "%0.0f", correct + combinedprobs[k]);
+ tag->setAttributeValue("num_corr", text);
+ sprintf(text, "%0.0f", incorrect + 1.0 - combinedprobs[k]);
+ tag->setAttributeValue("num_incorr", text);
+ }
+ output->insertAtEnd(tag);
+ threshind++;
+ k--;
+ }
+ else
+ {
+ correct += combinedprobs[k];
+ incorrect += 1 - combinedprobs[k];
+ }
- }
- if(grandTotal > 0.0) {
- double error_rates[] = {0.0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.015, 0.02, 0.025, 0.03, 0.04, 0.05, 0.075, 0.1, 0.15};
- correct = 0.0;
- incorrect = 0.0;
- threshind = 0;
- for(int k = 0; k < total; k++) {
- if(incorrect / (correct + incorrect) > error_rates[threshind]) {
- tag = new Tag("error_point", True, True);
- if(k == 0) {
- sprintf(text, "%0.3f", error_rates[threshind]);
- tag->setAttributeValue("error", text);
- sprintf(text, "%0.3f", combinedprobs[k]);
- tag->setAttributeValue("min_prob", text);
- sprintf(text, "%0.0f", correct + combinedprobs[k]);
- tag->setAttributeValue("num_corr", text);
- sprintf(text, "%0.0f", incorrect + 1.0 - combinedprobs[k]);
- tag->setAttributeValue("num_incorr", text);
- }
- else {
-
- sprintf(text, "%0.3f", error_rates[threshind]);
- tag->setAttributeValue("error", text);
- sprintf(text, "%0.3f", combinedprobs[k-1]);
- tag->setAttributeValue("min_prob", text);
- sprintf(text, "%0.0f", correct + combinedprobs[k]);
- tag->setAttributeValue("num_corr", text);
- sprintf(text, "%0.0f", incorrect + 1.0 - combinedprobs[k]);
- tag->setAttributeValue("num_incorr", text);
- }
- output->insertAtEnd(tag);
- threshind++;
- k--;
- }
- else {
- correct += combinedprobs[k];
- incorrect += 1 - combinedprobs[k];
- }
- if(threshind == (sizeof(error_rates)/sizeof(double)) - 1) {
- k = total; // done
- }
- } // next ordered prob
- } // if have data
- delete [] combinedprobs;
- return output;
+ if(threshind == (sizeof(error_rates) / sizeof(double)) - 1)
+ k = total; //done
+ } //next ordered prob
+ } //if have data
+
+ //finally insert end of roc_error_data tag
+ output->insertAtEnd(new Tag("roc_error_data", false, true));
+
+ delete[] combinedprobs;
+ return output;
}
// writes out sensitivity and error for various minimum probs
@@ -3294,7 +3351,7 @@
total += (*probs_)[charge]->size();
}
}
-
+
double* combinedprobs = new double[total];
total = 0; // index
int k;
@@ -3315,7 +3372,7 @@
combinedprobs[total++] = (*(*probs_)[charge])[k];
}
}
- } // next
+ } // next
}
else {
for(int charge = 0; charge < MAX_CHARGE; charge++) {
@@ -3324,9 +3381,9 @@
combinedprobs[total++] = (*(*probs_)[charge])[k];
}
}
- } // next
+ } // next
} // next charge
-
+
qsort(combinedprobs, total, sizeof(double), (int(*)(const void*, const void*)) comp_nums);
// now sens and error as walk down list
@@ -3363,13 +3420,13 @@
} // next member
fprintf(fout, "%0.2f\t\t%0.4f\t%0.4f\t\t%0.0f\t\t%0.0f\n", thresh[threshind], correct/totcorrect, incorrect/(correct + incorrect), correct, incorrect);
} // if have data
- else
+ else
fprintf(fout, "%0.2f\t\t%0.4f\t%0.4f\t\t%0.0f\t\t%0.0f\n", 0.0, 0.0, 0.0, 0.0, 0.0);
fprintf(fout, "\n\n");
fprintf(fout, "\n\nJOINT 1+/2+/3+ MIN PROB THRESHOLDS FOR SPECIFIED ERROR RATES\n");
fprintf(fout, "#err\t\tmin_prob\test # corr\test # incorr...
[truncated message content] |