|
From: <ds...@us...> - 2010-08-25 21:59:14
|
Revision: 5123
http://sashimi.svn.sourceforge.net/sashimi/?rev=5123&view=rev
Author: dshteyn
Date: 2010-08-25 21:59:07 +0000 (Wed, 25 Aug 2010)
Log Message:
-----------
Crux support
Added Paths:
-----------
trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/
trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.cxx
trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.h
trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.cxx
trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.h
Added: trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.cxx
===================================================================
--- trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.cxx (rev 0)
+++ trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.cxx 2010-08-25 21:59:07 UTC (rev 5123)
@@ -0,0 +1,126 @@
+#include "CruxDiscrimFunction.h"
+
+/*
+
+Program : DiscriminantFunction for discr_calc of PeptideProphet
+Author : Andrew Keller <ak...@sy...>
+Date : 11.27.02
+
+
+Copyright (C) 2003 Andrew Keller
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Andrew Keller
+Insitute for Systems Biology
+1441 North 34th St.
+Seattle, WA 98103 USA
+ak...@sy...
+
+*/
+
+
+CruxDiscrimFunction::CruxDiscrimFunction(int charge) : DiscriminantFunction(charge) {
+ CruxDiscrimFunction(charge, False);
+
+}
+
+CruxDiscrimFunction::CruxDiscrimFunction(int charge, Boolean use_xcorr) : DiscriminantFunction(charge) {
+
+ double consts[] = {0.646, -0.959, -1.460, -0.774, -0.598, -0.598, -0.598};
+ double xcorrs[] = {5.49, 8.362, 9.933, 1.465, 3.89, 3.89, 3.89};
+ double deltas[] = {4.643, 7.386, 11.149, 8.704, 7.271, 7.271, 7.271};
+ double ranks[] = {-0.455, -0.194, -0.201, -0.331, -0.377, -0.377, -0.377};
+ double massdiffs[] = {-0.84, -0.314, -0.277, -0.277, -0.84, -0.84, -0.84};
+ int max_pep_lens[] = {100, 15, 25, 50, 100, 100, 100};
+ int num_frags[] = {2, 2, 3, 4, 6, 6, 6};
+
+ const_ = consts[charge_];
+ xcorr_p_wt_ = xcorrs[charge_];
+ delta_wt_ = deltas[charge_];
+ log_rank_wt_ = ranks[charge_];
+ abs_massd_wt_ = massdiffs[charge_];
+ max_pep_len_ = max_pep_lens[charge_];
+ num_frags_ = num_frags[charge_];
+
+ use_xcorr_ = use_xcorr;
+
+ if (use_xcorr_) {
+ delta_wt_ = 0;
+ }
+
+ min_val_ = -100;
+
+}
+
+
+
+Boolean CruxDiscrimFunction::isComputable(SearchResult* result) {
+ return (result != NULL && ((CruxResult*)(result))->xcorr_ > 0.0);
+}
+
+double CruxDiscrimFunction::getDiscriminantScore(SearchResult* result) {
+ if(strcasecmp(result->getName(), "Crux") != 0) {
+ cerr << "illegal type of Crux result: " << result->getName() << endl;
+ exit(1);
+ }
+ CruxResult* cruxresult = (CruxResult*)(result);
+ double tot = const_;
+ tot += xcorr_p_wt_ * getXcorrP(cruxresult->xcorr_, getPepLen(cruxresult->peptide_));
+ if(cruxresult->delta_ < 1.0) // exclude special case of deltaCn equal to 1.00 (i.e. override by setting to 0)
+ tot += delta_wt_ * cruxresult->delta_;
+ //tot += log_rank_wt_ * log((double)cruxresult->rank_);
+ //tot += abs_massd_wt_ * myfabs(cruxresult->massdiff_);
+ Boolean writeMultivariate = False;
+
+ if(writeMultivariate) {
+ ofstream fMulti("multivariate.txt", ios::app);
+ if(! fMulti) {
+ cerr << "cannot append multivariate info for " << cruxresult->spectrum_ << endl;
+ exit(1);
+ }
+ fMulti << cruxresult->spectrum_ << "\t" << getXcorrP(cruxresult->xcorr_, getPepLen(cruxresult->peptide_)) << "\t" << cruxresult->delta_ << "\t" << myfabs(cruxresult->massdiff_) << "\t" << tot << endl;
+ fMulti.close();
+ }
+
+ return tot;
+}
+
+
+int CruxDiscrimFunction::getPepLen(char* pep) {
+ if(pep == NULL)
+ return 0;
+ int start = 0;
+ int end = (int)strlen(pep);
+ if(strlen(pep) > 4 && pep[1] == '.' && pep[strlen(pep)-2] == '.') {
+ start = 2;
+ end = (int)strlen(pep) - 2;
+ }
+ int tot = 0;
+ for(int k = start; k < end; k++)
+ if(pep[k] >= 'A' && pep[k] <= 'Z')
+ tot++;
+
+ return tot;
+}
+
+double CruxDiscrimFunction::getXcorrP(double xcorr, int peplen) {
+ int eff_pep_len = peplen;
+ if(eff_pep_len > max_pep_len_)
+ eff_pep_len = max_pep_len_;
+
+ return (log(xcorr)) / (log((double)(eff_pep_len * num_frags_)));
+
+}
Added: trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.h
===================================================================
--- trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.h (rev 0)
+++ trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimFunction.h 2010-08-25 21:59:07 UTC (rev 5123)
@@ -0,0 +1,74 @@
+#ifndef CRUXDISCR_FUN_H
+#define CRUXDISCR_FUN_H
+
+#include <string.h>
+#include <math.h>
+#include <iostream>
+#include <fstream>
+
+#include "Parsers/Algorithm2XML/SearchResult/CruxResult.h"
+//#include "WindowsCruxResult.h"
+#include "Validation/DiscriminateFunction/DiscriminantFunction.h"
+#include "Parsers/Algorithm2XML/SearchResult/SearchResult.h"
+#include "common/sysdepend.h"
+
+/*
+
+Program : DiscriminantFunction for discr_calc of PeptideProphet
+Author : Andrew Keller <ak...@sy...>
+Date : 11.27.02
+
+
+Copyright (C) 2003 Andrew Keller
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Andrew Keller
+Insitute for Systems Biology
+1441 North 34th St.
+Seattle, WA 98103 USA
+ak...@sy...
+
+*/
+
+
+class CruxDiscrimFunction : public DiscriminantFunction {
+
+ public:
+
+ CruxDiscrimFunction(int charge);
+ CruxDiscrimFunction(int charge, Boolean use_xcorr);
+ Boolean isComputable(SearchResult* result);
+ virtual double getDiscriminantScore(SearchResult* result);
+ double getXcorrP(double xcorr, int peplen);
+ int getPepLen(char* pep);
+
+ double getMinVal() { return min_val_; }
+
+ protected:
+
+ double xcorr_p_wt_;
+ double delta_wt_;
+ double log_rank_wt_;
+ double abs_massd_wt_;
+ double min_val_;
+
+ int max_pep_len_;
+ int num_frags_;
+ Boolean use_xcorr_;
+
+}; // class
+
+#endif
Added: trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.cxx
===================================================================
--- trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.cxx (rev 0)
+++ trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.cxx 2010-08-25 21:59:07 UTC (rev 5123)
@@ -0,0 +1,736 @@
+#include "CruxDiscrimValMixtureDistr.h"
+
+/*
+
+Program : CruxDiscrimValMixtureDistr for PeptideProphet
+Author : Brendan MacLean <bmaclean%at%fhcrc.org>
+Date : 06.20.06
+
+Copyright (C) 2006 Brendan MacLean and Andrew Keller
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Andrew Keller
+Insitute for Systems Biology
+1441 North 34th St.
+Seattle, WA 98103 USA
+ak...@sy...
+
+*/
+
+#include <map>
+#include <string>
+
+using namespace std;
+
+
+// TODO: Figure out a better way to make sure these get linked in.
+//extern void linkCruxNative();
+//extern void linkCruxKscore();
+//extern void linkCruxHscore();
+
+CruxDiscrimValMixtureDistr::CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof) {
+ init(charge, engine, name, tag, maldi, qtof, False, False);
+}
+
+CruxDiscrimValMixtureDistr::CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma) {
+ init(charge, engine, name, tag, maldi, qtof, gamma, False);
+}
+
+CruxDiscrimValMixtureDistr::CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam) {
+ init(charge, engine, name, tag, maldi, qtof, gamma, nonparam);
+}
+CruxDiscrimValMixtureDistr::CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam, Boolean use_xcorr) {
+ init(charge, engine, name, tag, maldi, qtof, gamma, nonparam, use_xcorr);
+}
+
+void CruxDiscrimValMixtureDistr::init(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma) {
+ init(charge, engine, name, tag, maldi, qtof, gamma, False);
+}
+
+void CruxDiscrimValMixtureDistr::init(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam) {
+ init(charge, engine, name, tag, maldi, qtof, gamma, False, False);
+}
+void CruxDiscrimValMixtureDistr::init(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam, Boolean use_xcorr) {
+
+ initializeDistr(charge, name, tag);
+ engine_ = engine;
+ all_negs_ = False;
+ maldi_ = maldi;
+ maxdiff_ = 0.002;
+ nonparam_ = nonparam;
+ use_xcorr_ = use_xcorr;
+
+ gamma_ = gamma;
+ qtof_ = qtof;
+ gammapos_ = False;
+
+ // linkCruxNative();
+ //linkCruxKscore();
+ // linkCruxHscore();
+
+ string algorithmName = "native";
+ const char* start = strchr(engine, '(');
+ if (start != NULL) {
+ algorithmName = start + 1;
+ algorithmName.assign(start + 1, strlen(start) - 2); // Remove '(' and ')'
+ }
+ algorithmName_ = algorithmName;
+
+ // setDiscrimFunction(charge);
+
+ doublevals_ = new Array<double>;
+
+ if (nonparam_) {
+ negdistr_ = new NonParametricDistribution(0.5, True, False);
+ posdistr_ = new NonParametricDistribution(0.5, False, False);
+ }
+ else {
+ if(gammapos_)
+ posdistr_ = new GammaDistribution(maxdiff_);
+ else
+ posdistr_ = new GaussianDistribution(maxdiff_);
+ //posdistr_ = new ExtremeValueDistribution(maxdiff_);
+
+ if (gamma_) {
+ negdistr_ = new GammaDistribution(maxdiff_);
+ } else {
+ negdistr_ = new ExtremeValueDistribution(maxdiff_);
+ }
+ }
+
+ double singlyposmaldiprior[] = {5.0, 0.6};
+ double singlynegmaldiprior[] = {1.0, 1.5, -5.0};
+
+ // single score
+ double singlyposprior[] = {1.87, 1.146};//{0.87, 1.146};
+ double doublyposprior[] = {4.46, 2.08}; //{1.109, 1.767};
+ double triplyposprior[] = {4.01, 2.53}; //{0.411, 1.545};
+ double quadposprior[] = {4.01, 2.53}; //{0.411, 1.545};
+ double pentposprior[] = {4.01, 2.53}; //{0.411, 1.545};
+ double hexposprior[] = {4.01, 2.53}; //{0.411, 1.545};
+ double septposprior[] = {4.01, 2.53}; //{0.411, 1.545};
+
+ double singlynegprior[] = {-0.24, 0.956};
+ double doublynegprior[] = {-0.36, 0.86}; //{-2.468, 0.374};
+ double triplynegprior[] = {-0.2, 0.86}; //{-2.179, 0.363};
+ double quadnegprior[] = {-0.2, 0.86}; //{-2.179, 0.363};
+ double pentnegprior[] = {-0.2, 0.86}; //{-2.179, 0.363};
+ double hexnegprior[] = {-0.2, 0.86}; //{-2.179, 0.363};
+ double septnegprior[] = {-0.2, 0.86}; //{-2.179, 0.363};
+
+ double singlyneggammaprior[] = {4.76, 1.0, -5.0};
+ double doublyneggammaprior[] = {4.64, 0.8, -5.0};
+ double triplyneggammaprior[] = {4.8, 0.8, -5.0};
+ double quadneggammaprior[] = {4.8, 0.8, -5.0};
+ double pentneggammaprior[] = {4.8, 0.8, -5.0};
+ double hexneggammaprior[] = {4.8, 0.8, -5.0};
+ double septneggammaprior[] = {4.8, 0.8, -5.0};
+
+
+
+ double doublyposgamma[] = {4.87, 26.0, -3.34 };
+ double triplyposgamma[] = {3.9, 16.8, -2.95 };
+ double quadposgamma[] = {3.9, 16.8, -2.95 };
+ double pentposgamma[] = {3.9, 16.8, -2.95 };
+ double hexposgamma[] = {3.9, 16.8, -2.95 };
+ double septposgamma[] = {3.9, 16.8, -2.95 };
+
+
+ negmean_ = -1.0;
+ MIN_NUM_PSEUDOS_ = 50;
+ ZERO_SET_ = 100; // ?
+ NUM_DEVS_ = 6;
+ USE_TR_NEG_DISTR_ = False;
+ posinit_ = NULL;
+ neginit_ = NULL;
+
+ if(charge == 0) {
+
+ if(maldi_) {
+ posinit_ = copy(singlyposmaldiprior, 2);
+ }
+ else {
+ posinit_ = copy(singlyposprior, 2);
+ }
+
+ if(maldi_) {
+ neginit_ = copy(singlynegmaldiprior, 2);
+ }
+ else if (gamma_) {
+ neginit_ = copy(singlyneggammaprior, 3);
+ }
+ else {
+ neginit_ = copy(singlynegprior, 2);
+ }
+
+ }
+ else if(charge == 1) {
+ if(gammapos_)
+ posinit_ = copy(doublyposgamma, 3);
+ else
+ posinit_ = copy(doublyposprior, 2);
+ if (gamma_)
+ neginit_ = copy(doublyneggammaprior, 3);
+ else
+ neginit_ = copy(doublynegprior, 2);
+ }
+ else if(charge == 2) {
+ if(gammapos_)
+ posinit_ = copy(triplyposgamma, 3);
+ else
+ posinit_ = copy(triplyposprior, 2);
+ if (gamma_)
+ neginit_ = copy(triplyneggammaprior, 3);
+ else
+ neginit_ = copy(triplynegprior, 2);
+ }
+ else if(charge == 3) {
+ if(gammapos_)
+ posinit_ = copy(quadposgamma, 3);
+ else
+ posinit_ = copy(quadposprior, 2);
+ if (gamma_)
+ neginit_ = copy(quadneggammaprior, 3);
+ else
+ neginit_ = copy(quadnegprior, 2);
+ }
+ else if(charge == 4) {
+ if(gammapos_)
+ posinit_ = copy(pentposgamma, 3);
+ else
+ posinit_ = copy(pentposprior, 2);
+ if (gamma_)
+ neginit_ = copy(pentneggammaprior, 3);
+ else
+ neginit_ = copy(pentnegprior, 2);
+ }
+ else if(charge == 5) {
+ if(gammapos_)
+ posinit_ = copy(hexposgamma, 3);
+ else
+ posinit_ = copy(hexposprior, 2);
+ if (gamma_)
+ neginit_ = copy(hexneggammaprior, 3);
+ else
+ neginit_ = copy(hexnegprior, 2);
+ }
+ else if(charge == 6) {
+ if(gammapos_)
+ posinit_ = copy(septposgamma, 3);
+ else
+ posinit_ = copy(septposprior, 2);
+ if (gamma_)
+ neginit_ = copy(septneggammaprior, 3);
+ else
+ neginit_ = copy(septnegprior, 2);
+ }
+
+ reset();
+
+}
+
+
+void CruxDiscrimValMixtureDistr::setDiscrimFunction(const char* mass_spec_type) {
+ discrim_func_ = (CruxDiscrimFunction*) new CruxDiscrimFunction(charge_, use_xcorr_);
+}
+
+
+Boolean CruxDiscrimValMixtureDistr::update(Array<double>* probs) {
+ Boolean rtn = DiscrimValMixtureDistr::update(probs);
+ negmean_ = ((ContinuousDistribution*)negdistr_)->getMean()+0.5*((ContinuousDistribution*)negdistr_)->getStdev();
+ return rtn;
+}
+
+double CruxDiscrimValMixtureDistr::getPosProb(int index) {
+ if(all_negs_ || (*doublevals_)[index] < negmean_) {
+ return 0.0;
+ }
+ if (nonparam_) {
+ return MixtureDistr::getPosProb(index);
+ }
+ if (gamma_) {
+ if (!((GammaDistribution*)(negdistr_))->aboveMin((*doublevals_)[index])) return (0.0);
+ } else {
+ if(! ((ExtremeValueDistribution*)(negdistr_))->aboveMin((*doublevals_)[index])) return (0.0);
+ }
+
+ return MixtureDistr::getPosProb(index);
+}
+
+double CruxDiscrimValMixtureDistr::getNegProb(int index) {
+ if(all_negs_ || (*doublevals_)[index] < negmean_) {
+ return 1.0;
+ }
+
+ if (nonparam_) {
+ return MixtureDistr::getNegProb(index);
+ }
+
+ if (gamma_) {
+ if (!((GammaDistribution*)(negdistr_))->aboveMin((*doublevals_)[index])) return (1.0);
+ } else {
+ if(! ((ExtremeValueDistribution*)(negdistr_))->aboveMin((*doublevals_)[index])) return (1.0);
+ }
+
+ return MixtureDistr::getNegProb(index);
+}
+
+Boolean CruxDiscrimValMixtureDistr::initializeNegDistribution(NTTMixtureDistr* nttdistr) {
+
+ if (gamma_) {
+ return (initializeNegGammaDistribution(nttdistr));
+ } else {
+ return (initializeNegExtremeValueDistribution(nttdistr));
+ }
+}
+Boolean CruxDiscrimValMixtureDistr::initializeNegDistribution(Array<Boolean>* isdecoy) {
+ if (nonparam_) {
+ isdecoy_ = isdecoy;
+ ((NonParametricDistribution*)(posdistr_))->initWithDecoy(isdecoy, doublevals_);
+ ((NonParametricDistribution*)(negdistr_))->initWithDecoy(isdecoy, doublevals_);
+ return True;
+ }
+ if (gamma_) {
+ return (initializeNegGammaDistribution(isdecoy));
+ } else {
+ return (initializeNegExtremeValueDistribution(isdecoy));
+ }
+}
+
+Boolean CruxDiscrimValMixtureDistr::initializeNegGammaDistribution(NTTMixtureDistr* nttdistr) {
+
+ // never use NTT=0 to initialize negative distribution
+
+ // cerr << "HERE" << std::endl;
+
+ double mean = 0.0;
+ double stdev = 0.0;
+ int tot = 0;
+ double totsq = 0.0;
+
+ // for single score guassian
+ double posmean[] = { 1.87, 4.46, 4.01 , 4.01, 4.01, 4.01, 4.01};
+ double posstdev[] = { 1.146, 2.08, 2.53, 2.53, 2.53, 2.53, 2.53 };
+
+ double negmean[] = {4.76, 4.64, 4.8, 4.8, 4.8, 4.8, 4.8};
+ double negstdev[] = {0.956, 0.86, 0.86, 0.86, 0.86, 0.86, 0.86};
+
+ // values for truncating negative distribution
+ if (charge_ != 0) {
+ ((GammaDistribution*)(negdistr_))->
+ setDistrMinval(((CruxDiscrimFunction*)discrim_func_)->getMinVal());
+ }
+
+ double MAX_SINGLY_NEGMEAN = 1.0;
+
+ double negmean_num_stds[] = {-0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; // by charge
+
+ int k;
+
+ double zero = -5.0;
+
+ for(k = 0; k < getNumVals(); k++) {
+ if((*doublevals_)[k] > zero && (*doublevals_)[k] < posmean[charge_] - posstdev[charge_]) {
+ mean += (*doublevals_)[k] - zero;
+ totsq += ((*doublevals_)[k] - zero) * ((*doublevals_)[k] - zero);
+ tot++;
+ }
+ } // next
+
+ if(tot > 0) {
+ mean /= tot;
+ stdev = totsq / tot - mean * mean;
+ }
+ else {
+
+ mean = negmean[charge_];
+ stdev = negstdev[charge_];
+
+ }
+
+ double* newsettings = new double[3];
+ newsettings[0] = mean;
+ newsettings[1] = stdev;
+ newsettings[2] = zero;
+
+ negdistr_->init(newsettings, False);
+ delete [] newsettings;
+
+ negmean_ = zero + mean - negmean_num_stds[charge_] * sqrt(stdev);
+ if(negmean_ > MAX_SINGLY_NEGMEAN) {
+ negmean_ = MAX_SINGLY_NEGMEAN;
+ }
+
+ USE_TR_NEG_DISTR_ = True;
+
+ // cerr << "negmean = " << negmean_ << std::endl;
+ return False; // done
+
+
+}
+
+
+Boolean CruxDiscrimValMixtureDistr::initializeNegExtremeValueDistribution(NTTMixtureDistr* nttdistr) {
+
+ // never use NTT=0 to initialize negative distribution
+
+ // cerr << "HERE" << std::endl;
+
+ double mean = 0.0;
+ double stdev = 0.0;
+ int tot = 0;
+ double totsq = 0.0;
+
+ // for single score guassian
+ double posmean[] = { 1.87, 4.46, 4.01 , 4.01, 4.01, 4.01, 4.01};
+ double posstdev[] = { 1.146, 2.08, 2.53, 2.53, 2.53, 2.53, 2.53 };
+
+ // double negmean[] = {4.76, 4.64, 4.8, 4.8, 4.8};
+ //double negstdev[] = {23, 23, 23, 23, 23};
+
+ double negmean[] = {-0.24, -0.36, -0.2, -0.2, -0.2, -0.2, -0.2};
+ double negstdev[] = {0.956, 0.86, 0.86, 0.86, 0.86, 0.86, 0.86};
+
+
+
+ // values for truncating negative distribution
+ if (charge_ != 0) {
+ ((ExtremeValueDistribution*)(negdistr_))->
+ setDistrMinval(((CruxDiscrimFunction*)discrim_func_)->getMinVal());
+ }
+
+ double MAX_SINGLY_NEGMEAN = 1.0;
+
+ double negmean_num_stds[] = {-0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; // by charge
+
+ int k;
+
+ for(k = 0; k < getNumVals(); k++) {
+ if((*doublevals_)[k] < posmean[charge_] - posstdev[charge_]) {
+ mean += (*doublevals_)[k];
+ totsq += ((*doublevals_)[k]) * ((*doublevals_)[k]);
+ tot++;
+ }
+ } // next
+
+ if(tot > 0) {
+ mean /= tot;
+ stdev = totsq / tot - mean * mean;
+ }
+ else {
+
+ mean = negmean[charge_];
+ stdev = negstdev[charge_];
+
+ }
+
+ double* newsettings = new double[3];
+ newsettings[0] = mean;
+ newsettings[1] = stdev;
+
+ negdistr_->init(newsettings);
+ delete [] newsettings;
+
+ negmean_ = mean - negmean_num_stds[charge_] * sqrt(stdev);
+ if(negmean_ > MAX_SINGLY_NEGMEAN) {
+ negmean_ = MAX_SINGLY_NEGMEAN;
+ }
+
+ USE_TR_NEG_DISTR_ = True;
+
+ cerr << "negmean = " << negmean_ << std::endl;
+ return False; // done
+}
+
+Boolean CruxDiscrimValMixtureDistr::initializeNegGammaDistribution(Array<Boolean>* isdecoy) {
+ if (nonparam_) {
+ return True;
+ }
+
+ //cerr << "HERE" << endl;
+
+ double mean = 0.0;
+ double stdev = 0.0;
+ int tot = 0;
+ double totsq = 0.0;
+
+ // for single score guassian
+ double posmean[] = { 1.87, 4.46, 4.01 , 4.01, 4.01, 4.01, 4.01};
+ double posstdev[] = { 1.146, 2.08, 2.53, 2.53, 2.53, 2.53, 2.53 };
+
+ double negmean[] = {4.76, 4.64, 4.8, 4.8, 4.8, 4.8, 4.8};
+ double negstdev[] = {0.956, 0.86, 0.86, 0.86, 0.86, 0.86, 0.86};
+
+ // values for truncating negative distribution
+ if (charge_ != 0) {
+ ((GammaDistribution*)(negdistr_))->
+ setDistrMinval(((CruxDiscrimFunction*)discrim_func_)->getMinVal());
+ }
+
+ double MAX_SINGLY_NEGMEAN = 1.0;
+
+ double negmean_num_stds[] = {-0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; // by charge
+
+ int k;
+
+ double zero = -5.0;
+ double pmean = 0.0;
+ double pstdev = 0.0;
+ double ptotsq = 0.0;
+ int ptot = 0;
+ for(k = 0; k < getNumVals(); k++) {
+ if((*isdecoy)[k]) {
+ mean += (*doublevals_)[k]-zero;
+ totsq += ((*doublevals_)[k]-zero) * ((*doublevals_)[k]-zero);
+ tot++;
+ }
+ else {
+ pmean += (*doublevals_)[k];
+ ptotsq += (*doublevals_)[k] * (*doublevals_)[k];
+ ptot++;
+ }
+ } // next
+
+ if(tot > 0) {
+ mean /= tot;
+ stdev = totsq / tot - mean * mean;
+ stdev = sqrt(stdev);
+ }
+ else {
+ mean = negmean[charge_];
+ stdev = negstdev[charge_];
+ USE_TR_NEG_DISTR_ = True;
+
+ }
+
+ if(ptot > 0) {
+ pmean /= ptot;
+ pstdev = ptotsq / ptot - pmean * pmean;
+ pstdev = sqrt(pstdev);
+ }
+ else {
+ pmean = posmean[charge_];
+ pstdev = posstdev[charge_];
+ }
+ // cerr << "pmean: " << pmean << " pstdev:" << pstdev << endl;
+
+ double* newsettings = new double[3];
+ newsettings[0] = mean;
+ newsettings[1] = stdev;
+ newsettings[2] = zero;
+
+ negdistr_->init(newsettings, False);
+ delete [] newsettings;
+
+ negmean_ = zero + mean - negmean_num_stds[charge_] * stdev;
+ if(negmean_ > MAX_SINGLY_NEGMEAN) {
+ negmean_ = MAX_SINGLY_NEGMEAN;
+ }
+
+
+ setPositiveDistr(pmean, pstdev, 1);
+ ((ContinuousDistribution*)negdistr_)->setTot(1);
+ //((ContinuousDistribution*)posdistr_)->setTot(1);
+ while(noDistr()) {
+ pmean += 0.5;
+ setPositiveDistr(pmean, pstdev, 1);
+ }
+
+
+ //cerr << "negmean = " << negmean_ << endl;
+ return False; // done
+
+
+}
+
+
+Boolean CruxDiscrimValMixtureDistr::initializeNegExtremeValueDistribution(Array<Boolean>* isdecoy) {
+ if (nonparam_) {
+ return True;
+ }
+ //assert(nttdistr->getNumVals() == getNumVals());
+ double mean = 0.0;
+ double stdev = 0.0;
+ int tot = 0;
+ double totsq = 0.0;
+
+ // evd for discriminatn score
+ //double posmean[] = { 2.0, 4.102, 4.563 };
+ //double posstdev[] = { 0.4, 1.64, 1.84 };
+
+ // for single score guassian
+ double posmean[] = { 1.87, 4.46, 4.01 , 4.01, 4.01, 4.01, 4.01};
+ double posstdev[] = { 1.146, 2.08, 2.53, 2.53, 2.53, 2.53, 2.53 };
+
+ double negmean[] = {-0.24, -0.36, -0.2, -0.2, -0.2, -0.2, -0.2};
+ double negstdev[] = {0.956, 0.86, 0.86, 0.86, 0.86, 0.86, 0.86};
+
+
+
+ // values for truncating negative distribution
+ if (charge_ != 0) {
+ ((ExtremeValueDistribution*)(negdistr_))->setDistrMinval(((CruxDiscrimFunction*)discrim_func_)->getMinVal());
+ }
+ double zero = -5.0;
+ double pmean = 0.0;
+ double pstdev = 0.0;
+ double ptotsq = 0.0;
+ int ptot = 0;
+
+ double MAX_SINGLY_NEGMEAN = 1.0;
+
+ //double negmean_num_stds[] = {-1.0, 0.5, 0.5}; // by charge
+ //double negmean_num_stds[] = {-0.1, 1.0, 1.0}; // by charge
+ double negmean_num_stds[] = {-0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; // by charge
+
+ int k;
+
+ for(k = 0; k < getNumVals(); k++) {
+ //Use only decoy values to init the negative,
+ //the values have to be greater than the minimum value of the distribution
+ if((*isdecoy)[k]) {
+ mean += (*doublevals_)[k];
+ totsq += (*doublevals_)[k] * (*doublevals_)[k];
+ tot++;
+ }
+ else {
+ pmean += (*doublevals_)[k];
+ ptotsq += (*doublevals_)[k] * (*doublevals_)[k];
+ ptot++;
+ }
+ }
+
+ if(tot < MIN_NUM_PSEUDOS_) {
+ tot = 0; // restart
+ mean = 0.0;
+ totsq = 0.0;
+ for(int k = 0; k < getNumVals(); k++) {
+ if((*doublevals_)[k] < posmean[charge_] - posstdev[charge_]) {
+ mean += (*doublevals_)[k];
+ totsq += ((*doublevals_)[k]) * ((*doublevals_)[k]);
+ tot++;
+ }
+ } // next
+
+ if(tot > 0) {
+ mean /= tot;
+ stdev = totsq / tot - mean * mean;
+ }
+ else {
+ mean = negmean[charge_];
+ stdev = negstdev[charge_];
+ }
+
+
+
+ double* newsettings = new double[2];
+ newsettings[0] = mean;
+ newsettings[1] = stdev;
+ newsettings[1] = sqrt(newsettings[1]);
+ negdistr_->init(newsettings);
+ delete [] newsettings;
+
+
+ negmean_ = mean - negmean_num_stds[charge_] * stdev;
+ if(negmean_ > MAX_SINGLY_NEGMEAN) {
+ negmean_ = MAX_SINGLY_NEGMEAN;
+ }
+
+ USE_TR_NEG_DISTR_ = True;
+ return False; // done
+ } // if not enough pseudos
+
+ if(ptot > 0) {
+ pmean /= ptot;
+ pstdev = ptotsq / ptot - pmean * pmean;
+ pstdev = sqrt(pstdev);
+ }
+ else {
+ pmean = posmean[charge_];
+ pstdev = posstdev[charge_];
+ }
+
+ mean /= tot;
+ stdev = (totsq / tot) - mean * mean;
+ stdev = sqrt(stdev);
+
+ negmean_ = mean - negmean_num_stds[charge_] * stdev;
+
+ double newposmean = pmean;
+ double newposstdev = pstdev;
+
+ setNegativeDistr(negmean_, stdev);
+ setPositiveDistr(newposmean, newposstdev,1);
+ ((ContinuousDistribution*)negdistr_)->setTot(1);
+ // ((ContinuousDistribution*)posdistr_)->setTot(1);
+ if (charge_ == 0 && getNumVals() < 500) {
+ return True;
+ }
+ else {
+ while(negmean_ + stdev > newposmean - newposstdev) {
+ newposmean += 0.5;
+ setPositiveDistr(newposmean, newposstdev, 1);
+ }
+ }
+
+ return True;
+}
+
+
+
+
+
+
+void CruxDiscrimValMixtureDistr::setNegativeDistr(double mean, double stdev, double tot) {
+ assert(!gamma_);
+ double* next = new double[2];
+ next[0] = mean;
+ next[1] = stdev;
+ negmean_ = mean;
+ negdistr_->init(next);
+ ((ContinuousDistribution*)negdistr_)->setTot(tot);
+ delete next;
+}
+
+void CruxDiscrimValMixtureDistr::setNegativeDistr(double mean, double stdev) {
+ assert(!gamma_);
+ double* next = new double[2];
+ next[0] = mean;
+ next[1] = stdev;
+ negmean_ = mean;
+ negdistr_->init(next);
+ delete next;
+}
+
+double CruxDiscrimValMixtureDistr::getMinVal() {
+
+ if (algorithmName_ == "h-score") {
+ return (-8.0);
+ } else {
+ return (DiscrimValMixtureDistr::getMinVal());
+ }
+
+}
+
+double CruxDiscrimValMixtureDistr::getMaxVal() {
+
+ if (algorithmName_ == "h-score") {
+ return (12.0);
+ } else {
+ return (DiscrimValMixtureDistr::getMaxVal());
+ }
+
+}
Added: trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.h
===================================================================
--- trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.h (rev 0)
+++ trunk/trans_proteomic_pipeline/src/Validation/DiscriminateFunction/Crux/CruxDiscrimValMixtureDistr.h 2010-08-25 21:59:07 UTC (rev 5123)
@@ -0,0 +1,93 @@
+#ifndef CRUX_DISCRIMV_H
+#define CRUX_DISCRIMV_H
+
+#include <string>
+
+#include "common/sysdepend.h"
+#include "Validation/DiscriminateFunction/DiscrimValMixtureDistr.h"
+#include "Validation/Distribution/ExtremeValueDistribution.h"
+#include "Validation/Distribution/GaussianDistribution.h"
+#include "CruxDiscrimFunction.h"
+
+/*
+
+Program : CruxDiscrimValMixtureDistr for PeptideProphet
+Author : Brendan MacLean <bmaclean%at%fhcrc.org>
+Date : 06.20.06
+
+Copyright (C) 2006 Brendan MacLean and Andrew Keller
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Andrew Keller
+Insitute for Systems Biology
+1441 North 34th St.
+Seattle, WA 98103 USA
+ak...@sy...
+
+*/
+#include "Validation/MixtureModel/OrderedResult.h"
+
+using namespace std;
+
+class CruxDiscrimFunctionFactory;
+
+class CruxDiscrimValMixtureDistr : public DiscrimValMixtureDistr {
+
+ public:
+ CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof);
+ CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma);
+ CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam);
+ CruxDiscrimValMixtureDistr(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam, Boolean use_xcorr);
+ void init(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma);
+ void init(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam);
+ void init(int charge, const char* engine, const char* name, const char* tag, Boolean maldi, Boolean qtof, Boolean gamma, Boolean nonparam, Boolean use_xcorr);
+ double getPosProb(int index);
+ double getNegProb(int index);
+ Boolean initializeNegDistribution(NTTMixtureDistr* nttdistr);
+ Boolean initializeNegDistribution(Array<Boolean>* isdecoy);
+
+
+ // Boolean noDistr();
+ void setDiscrimFunction(const char* mass_spec_type);
+
+ virtual Boolean update(Array<double>* probs);
+
+ // HENRY
+ virtual double getMinVal();
+ virtual double getMaxVal();
+ // END HENRY
+
+ protected:
+
+ Boolean initializeNegGammaDistribution(NTTMixtureDistr* nttdistr);
+ Boolean initializeNegExtremeValueDistribution(NTTMixtureDistr* nttdistr);
+
+ Boolean initializeNegGammaDistribution(Array<Boolean>* isdecoy);
+ Boolean initializeNegExtremeValueDistribution(Array<Boolean>* isdecoy);
+
+ void setNegativeDistr(double mean, double stdev);
+
+ void setNegativeDistr(double mean, double stdev, double tot);
+ std::string engine_;
+ Boolean gammapos_; // whether pos distributions are gamma
+ string algorithmName_;
+
+ Boolean use_xcorr_; //only use xcorr not deltaCn
+
+};
+
+
+#endif
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|