From: <ds...@us...> - 2008-07-31 16:50:50
|
Revision: 3442 http://sashimi.svn.sourceforge.net/sashimi/?rev=3442&view=rev Author: dshteyn Date: 2008-07-31 16:50:46 +0000 (Thu, 31 Jul 2008) Log Message: ----------- New accurate mass, retention time, and pI models utilizing kernel density estimators instead of historgrams. Added Paths: ----------- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.cxx trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.h trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.cxx trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.h trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.cxx trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.h trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensitypIMixtureDistr.cxx Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.cxx =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.cxx (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.cxx 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,135 @@ +#include "AccurateMassDiffDiscrMixtureDistr.h" + +/* + +Program : AccurateMassDiffDiscrMixtureDistr for PeptideProphet +Author : Andrew Keller <ak...@sy...>, + David Shteynberg <2008> +Date : 11.27.02 + +Copyright (C) 2003 Andrew Keller 2008 David Shteynberg + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Andrew Keller +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + + +AccurateMassDiffDiscrMixtureDistr::AccurateMassDiffDiscrMixtureDistr(int charge, char* name, char* tag, double range, double window, double orig) : MassDifferenceDiscrMixtureDistr(charge, name, tag, range, window) { + offset_init_ = orig; + offset_ = offset_init_; + vals_ = new Array<double>(); + offset_set_ = False; + update_ctr_ = 0; + min_ctr_ = 2; // min value for offset update + model_ = new KDModel("Accurate Mass Model", 1000); + ready_ = false; + +} + +void AccurateMassDiffDiscrMixtureDistr::enter(SearchResult* result) { + assert(intvals_ != NULL); + double massdiff = result->massdiff_; + int tmp = (int)massdiff; + + if (massdiff - ((double)tmp*_ISOMASSDIS_) > 0.5) { + massdiff = massdiff - ((double)(tmp+1)*_ISOMASSDIS_); + } + else if (massdiff - ((double)tmp*_ISOMASSDIS_) > 0.0) { + massdiff = massdiff - ((double)tmp*_ISOMASSDIS_); + } + else if (massdiff - ((double)tmp*_ISOMASSDIS_) < -0.5) { + massdiff = massdiff - ((double)(tmp-1)*_ISOMASSDIS_); + } + else { + massdiff = massdiff - ((double)tmp*_ISOMASSDIS_); + } + vals_->insertAtEnd(massdiff); // put the double value here + + + model_->insert(0.5, massdiff, -0.2, 0.2); + +} + +int AccurateMassDiffDiscrMixtureDistr::getNumVals() { + return vals_->size(); +} + +Boolean AccurateMassDiffDiscrMixtureDistr::update(Array<double>* probs) { + Boolean output = False; + if (!ready_) { + model_->makeReady(0.01, 0.1); + ready_ = true; + } + else if(model_->update(probs,0, -0.2, 0.2) || output) { + return True; + } + return False; + +} + +Boolean AccurateMassDiffDiscrMixtureDistr::update(Array<Array<double>*>* all_probs) { + Boolean output = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + output = update(probs); + + delete probs; + return output; + +} + +Array<Tag*>* AccurateMassDiffDiscrMixtureDistr::getMixtureDistrTags(char* name) { + Array<Tag*>* output = model_->reportTags(); + return output; + +} + +char* AccurateMassDiffDiscrMixtureDistr::getStringValue(int index) { + char* output = new char[32]; + if(vals_ != NULL) + sprintf(output, "%0.3f", (*vals_)[index]); + else if(intvals_ != NULL) + sprintf(output, "%0d", (*intvals_)[index]); + return output; +} + +double AccurateMassDiffDiscrMixtureDistr::getPosProb(int index) { + if (ready_) + return model_->getPosProb((*vals_)[index]); + + return 0.5; +} + +double AccurateMassDiffDiscrMixtureDistr::getNegProb(int index) { + if (ready_) + return model_->getNegProb((*vals_)[index]); + + return 0.5; +} Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.h =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.h (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/AccurateMassDiffDiscrMixtureDistr.h 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,66 @@ +#ifndef ACCMASS_OR_MASSD_H +#define ACCMASS_OR_MASSD_H + +#include <assert.h> +#include "constants.h" +#include "MassDifferenceDiscrMixtureDistr.h" +#include "KDModel.h" + +/* + +Program : AccurateMassDiffDiscrMixtureDistr for PeptideProphet +Authors : Andrew Keller <ak...@sy...>, David Shteynberg +Date : 11.27.02 + +Copyright (C) 2003 Andrew Keller (c) 2008 David Shteynberg + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +David Shteynberg +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + +class AccurateMassDiffDiscrMixtureDistr : public MassDifferenceDiscrMixtureDistr { + + public: + + AccurateMassDiffDiscrMixtureDistr(int charge, char* name, char* tag, double range, double window, double orig); + Boolean update(Array<double>* probs); + Boolean update(Array<Array<double>*>* probs); + void enter(SearchResult* result); + Array<Tag*>* getMixtureDistrTags(char* name); + char* getStringValue(int index); + double getPosProb(int index); + double getNegProb(int index); + int getNumVals(); + protected: + + KDModel* model_; + + double offset_; + double offset_init_; + Array<double>* vals_; + int update_ctr_; + int min_ctr_; + Boolean offset_set_; + bool ready_; +}; + + +#endif Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.cxx =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.cxx (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.cxx 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,205 @@ +#include "KernelDensityPIMixtureDistr.h" + +/* + +Program : KernelDensityPIDiscrMixtureDistr for PeptideProphet +Author : Andrew Keller <ak...@sy...> +Date : 11.27.02 + +Copyright (C) 2003 Andrew Keller + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Andrew Keller +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + + +KernelDensityPIMixtureDistr::KernelDensityPIMixtureDistr(int charge, char* name, char* tag, double range, double window, double orig) : pIMixtureDistr(charge, name, tag) { + offset_init_ = orig; + offset_ = offset_init_; + vals_ = new Array<double>; + doublevals_ = vals_; + pIvals_ = new Array<double>; + model_ = new KDModel(name, 500); + update_ctr_ = 0; + min_ctr_ = 2; + ready_ = false; +} + +void KernelDensityPIMixtureDistr::write_pIstats(ostream& out) { + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->write_pIstats(out); + } +} + + +void KernelDensityPIMixtureDistr::recalc_pIstats(Array<Array<double>*>* probs) { + vals_->reserve(vals_->size()); + pIvals_->reserve(pIvals_->size()); + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->recalc_pIstats((*probs)[i]); + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = (*run_pI_calc_)[i]->getpIScore((*run_pI_calc_)[i]->pIs_[j], (char*)(*(*run_pI_calc_)[i]->peps_)[j]); + pIvals_->insertAtEnd((*run_pI_calc_)[i]->pIs_[j]); + vals_->insertAtEnd(dval); + model_->insert((*(*probs)[i])[j], dval); + } + } + +} + +void KernelDensityPIMixtureDistr::recalc_pIstats(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt) { + vals_->reserve(vals_->size()); // resets container count to 0, but leaves allocation alone for efficient repopulation + pIvals_->reserve(pIvals_->size()); // resets container count to 0, but leaves allocation alone for efficient repopulation + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->recalc_pIstats((*probs)[i], min_prob, (*ntts)[i], min_ntt); + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = (*run_pI_calc_)[i]->getpIScore((*run_pI_calc_)[i]->pIs_[j], (char*)(*(*run_pI_calc_)[i]->peps_)[j]); + pIvals_->insertAtEnd((*run_pI_calc_)[i]->pIs_[j]); + vals_->insertAtEnd(dval); + model_->insert((*(*probs)[i])[j], dval); + } + + } + +} + +void KernelDensityPIMixtureDistr::calc_pIstats() { + vals_->reserve(vals_->size()); + pIvals_->reserve(pIvals_->size()); + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->calc_pIstats(); + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = (*run_pI_calc_)[i]->getpIScore((*run_pI_calc_)[i]->pIs_[j], (char*)(*(*run_pI_calc_)[i]->peps_)[j]); + pIvals_->insertAtEnd((*run_pI_calc_)[i]->pIs_[j]); + vals_->insertAtEnd(dval); + model_->insert(0.5, dval); + } + } + +} + +void KernelDensityPIMixtureDistr::enter(int index, char* val) { + double dval = atof(val); + vals_->insertAtEnd(dval); // put the double value here + model_->insert(0.5, dval); +} + + +Boolean KernelDensityPIMixtureDistr::update(Array<Array<double>*>* all_probs) { + Boolean rtn = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + if (!ready_) { + recalc_pIstats(all_probs); + // model_->makeReady(1,1); + model_->makeReady(); + ready_ = true; + } + else if(model_->update(probs,0)) { + rtn = True; + } + + delete probs; + return rtn; + +} +Boolean KernelDensityPIMixtureDistr::update(Array<Array<double>*>* all_probs, double min_prob, Array<Array<int>*>* all_ntts, int min_ntt) { + Boolean rtn = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + if (!ready_) { + recalc_pIstats(all_probs, min_prob, all_ntts, min_ntt); + // model_->makeReady(1,1); + model_->makeReady(); + ready_ = true; + } + else if(model_->update(probs,0)) { + rtn = True; + } + + delete probs; + return rtn; + + +} + + +Array<Tag*>* KernelDensityPIMixtureDistr::getMixtureDistrTags(char* name) { + Array<Tag*>* output = model_->reportTags(); + return output; + +} + + +double KernelDensityPIMixtureDistr::getPosProb(int index) { + if (ready_) + return model_->getPosProb((*vals_)[index]); + + return 0.5; +} + +double KernelDensityPIMixtureDistr::getNegProb(int index) { + if (ready_) + return model_->getNegProb((*vals_)[index]); + + return 0.5; +} + +#define RESLEN 32 +char* KernelDensityPIMixtureDistr::getStringValue(int index) { + char* output = new char[RESLEN+1]; + if(vals_ != NULL) + snprintf(output, RESLEN, "%0.2f", (*vals_)[index]); + + return output; +} + +char* KernelDensityPIMixtureDistr::getStringpIValue(int index) { + char* output = new char[RESLEN+1]; + if(pIvals_ != NULL) + snprintf(output, RESLEN, "%0.2f", (*pIvals_)[index]); + return output; +} + +int KernelDensityPIMixtureDistr::getNumVals() { + if (vals_ != NULL) { + return vals_->size(); + } + return 0; +} Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.h =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.h (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityPIMixtureDistr.h 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,82 @@ +#ifndef KERD_PID_H +#define KERD_PID_H + +#include <assert.h> + +#include "pIMixtureDistr.h" +#include "KDModel.h" + +/* + +Program : KernelDensityPIMixtureDistribution for PeptideProphet +Author : Andrew Keller <ak...@sy...> + David Shteynberg +Date : 11.27.02 + 12.28.08 + +Copyright (C) 2003 Andrew Keller +Copyright (C) 2008 David Shteynberg + + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Andrew Keller +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + +class KernelDensityPIMixtureDistr : public pIMixtureDistr { + + public: + + KernelDensityPIMixtureDistr(int charge, char* name, char* tag, double range, double window, double orig); + int getIntegralValue(double val); + double getMode(double window, Array<double>* probs); + void enter(int index, char* val); + Boolean update(Array<Array<double>*>* probs); + // Boolean update(Array<Array<double>*>* all_probs, double min_prob); + Boolean update(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt); + // void writeDistr(FILE* fout); + //DDS: using parent void enter(SearchResult* result); + Array<Tag*>* getMixtureDistrTags(char* name); + char* getStringValue(int index); + char* getStringpIValue(int index); + Boolean haveDataWithValue(int bin); + double getPosProb(int index); + double getNegProb(int index); + void recalc_pIstats(Array<Array<double>*>* probs); + void recalc_pIstats(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt); + // void recalc_pIstats(Array<Array<double>*>* probs, double min_prob); + void calc_pIstats(); + void write_pIstats(std::ostream& out); + int getNumVals(); + protected: + double offset_; + double offset_init_; + Array<double>* vals_; + Array<double>* pIvals_; + KDModel* model_; + int update_ctr_; + int min_ctr_; + Boolean offset_set_; + bool ready_; + +}; + + +#endif Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.cxx =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.cxx (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.cxx 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,207 @@ +#include "KernelDensityRTMixtureDistr.h" + +/* + +Program : KernelDensityRTMixtureDistribution for PeptideProphet +Author : Andrew Keller <ak...@sy...> + David Shteynberg +Date : 11.27.02 + +Copyright (C) 2003 Andrew Keller + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Andrew Keller +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + + +KernelDensityRTMixtureDistr::KernelDensityRTMixtureDistr(int charge, char* name, char* tag) : RTMixtureDistr(charge, name, tag) { + vals_ = new Array<double>; + doublevals_ = vals_; + RTvals_ = new Array<double>; + model_ = new KDModel(name, 500); + update_ctr_ = 0; + min_ctr_ = 2; +} + +void KernelDensityRTMixtureDistr::write_RTstats(ostream& out) { + for (int i = 0; i < run_RT_calc_->size(); i++) { + (*run_RT_calc_)[i]->write_RTstats(out); + } +} + + +void KernelDensityRTMixtureDistr::recalc_RTstats(Array<Array<double>*>* probs) { + vals_->reserve(vals_->size()); + RTvals_->reserve(RTvals_->size()); + for (int i = 0; i < run_RT_calc_->size(); i++) { + (*run_RT_calc_)[i]->recalc_RTstats((*probs)[i]); + for (int j=0; j < (*run_RT_calc_)[i]->RTs_->size(); j++) { + double dval = (*run_RT_calc_)[i]->getRTScore((*(*run_RT_calc_)[i]->RTs_)[j], ((*run_RT_calc_)[i]->scans_)[j]); + + RTvals_->insertAtEnd((*(*run_RT_calc_)[i]->RTs_)[j]); + vals_->insertAtEnd(dval); + model_->insert((*(*probs)[i])[j], dval); + } + } + +} + +void KernelDensityRTMixtureDistr::recalc_RTstats(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt) { + + vals_->reserve(vals_->size()); + RTvals_->reserve(RTvals_->size()); + for (int i = 0; i < run_RT_calc_->size(); i++) { + (*run_RT_calc_)[i]->recalc_RTstats((*probs)[i], min_prob, (*ntts)[i], min_ntt); + for (int j=0; j < (*run_RT_calc_)[i]->RTs_->size(); j++) { + double dval = (*run_RT_calc_)[i]->getRTScore((*(*run_RT_calc_)[i]->RTs_)[j], ((*run_RT_calc_)[i]->scans_)[j]); + RTvals_->insertAtEnd((*(*run_RT_calc_)[i]->RTs_)[j]); + vals_->insertAtEnd(dval); + model_->insert((*(*probs)[i])[j], dval); + } + } +} + +void KernelDensityRTMixtureDistr::calc_RTstats() { + vals_->reserve(vals_->size()); + RTvals_->reserve(RTvals_->size()); + for (int i = 0; i < run_RT_calc_->size(); i++) { + (*run_RT_calc_)[i]->calc_RTstats(); + for (int j=0; j < (*run_RT_calc_)[i]->RTs_->size(); j++) { + double dval = (*run_RT_calc_)[i]->getRTScore((*(*run_RT_calc_)[i]->RTs_)[j], ((*run_RT_calc_)[i]->scans_)[j]); + + RTvals_->insertAtEnd((*(*run_RT_calc_)[i]->RTs_)[j]); + vals_->insertAtEnd(dval); + model_->insert(0.5, dval); + } + } + +} + +void KernelDensityRTMixtureDistr::enter(int index, char* val) { + double dval = atof(val); + vals_->insertAtEnd(dval); // put the double value here + model_->insert(0.5, dval); +} + +Boolean KernelDensityRTMixtureDistr::update(Array<Array<double>*>* all_probs) { + Boolean rtn = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + if (!ready_) { + recalc_RTstats(all_probs); + // model_->makeReady(1,1); + model_->makeReady(); + ready_ = true; + } + else if(model_->update(probs,0)) { + rtn = True; + } + + delete probs; + return rtn; + +} +Boolean KernelDensityRTMixtureDistr::update(Array<Array<double>*>* all_probs, double min_prob, Array<Array<int>*>* all_ntts, int min_ntt) { + Boolean rtn = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + if (!ready_) { + for (int i = 0; i < run_RT_calc_->size(); i++) { + (*run_RT_calc_)[i]->batchSSRCalcHP(); + } + recalc_RTstats(all_probs, min_prob, all_ntts, min_ntt); + // model_->makeReady(1,1); + model_->makeReady(); + ready_ = true; + } + else if(model_->update(probs,0)) { + rtn = True; + } + + delete probs; + return rtn; +} + + + +Array<Tag*>* KernelDensityRTMixtureDistr::getMixtureDistrTags(char* name) { + Array<Tag*>* output = model_->reportTags(); + return output; + +} + + +double KernelDensityRTMixtureDistr::getPosProb(int index) { + if (ready_) + return model_->getPosProb((*vals_)[index]); + + return 0.5; +} + +double KernelDensityRTMixtureDistr::getNegProb(int index) { + if (ready_) + return model_->getNegProb((*vals_)[index]); + + return 0.5; +} + + + +char* KernelDensityRTMixtureDistr::getStringValue(int index) { + char* output = new char[32]; + if (vals_ != NULL) + sprintf(output, "%0.2f", (*vals_)[index]); + + return output; +} + +char* KernelDensityRTMixtureDistr::getStringRTValue(int index) { + char* output = new char[32]; + if(RTvals_ != NULL) + sprintf(output, "%0.2f", (*RTvals_)[index]); + return output; +} + +int KernelDensityRTMixtureDistr::getNumVals() { + if (vals_ != NULL) { + return vals_->size(); + } + return 0; +} + Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.h =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.h (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensityRTMixtureDistr.h 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,78 @@ +#ifndef KERD_RTD_H +#define KERD_RTD_H + +#include <assert.h> + +#include "RTMixtureDistr.h" +#include "KDModel.h" + + +/* + +Program : KernelDensityRTMixtureDistribution for PeptideProphet +Author : Andrew Keller <ak...@sy...> + David Shteynberg +Date : 11.27.02 + +Copyright (C) 2003 Andrew Keller + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Andrew Keller +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + +class KernelDensityRTMixtureDistr : public RTMixtureDistr { + + public: + + KernelDensityRTMixtureDistr(int charge, char* name, char* tag); + int getIntegralValue(double val); + double getMode(double window, Array<double>* probs); + void enter(int index, char* val); + Boolean update(Array<Array<double>*>* probs); + // Boolean update(Array<Array<double>*>* all_probs, double min_prob); + Boolean update(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt); + + //DDS: using parent void enter(SearchResult* result); + Array<Tag*>* getMixtureDistrTags(char* name); + char* getStringValue(int index); + char* getStringRTValue(int index); + Boolean haveDataWithValue(int bin); + double getPosProb(int index); + double getNegProb(int index); + void recalc_RTstats(Array<Array<double>*>* probs); + void recalc_RTstats(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt); + // void recalc_RTstats(Array<Array<double>*>* probs, double min_prob); + int getNumVals(); + void calc_RTstats(); + void write_RTstats(std::ostream& out); + protected: + int update_ctr_; + int min_ctr_; + Array<double>* vals_; + Array<double>* RTvals_; + KDModel* model_; + bool ready_; + + +}; + + +#endif Added: trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensitypIMixtureDistr.cxx =================================================================== --- trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensitypIMixtureDistr.cxx (rev 0) +++ trunk/trans_proteomic_pipeline/src/Validation/MixtureDistribution/KernelDensitypIMixtureDistr.cxx 2008-07-31 16:50:46 UTC (rev 3442) @@ -0,0 +1,392 @@ +#include "VariableOffsetpIMixtureDistr.h" + +/* + +Program : VariableOffsetMassDiffDiscrMixtureDistr for PeptideProphet +Author : Andrew Keller <ak...@sy...> +Date : 11.27.02 + +Copyright (C) 2003 Andrew Keller + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Andrew Keller +Insitute for Systems Biology +1441 North 34th St. +Seattle, WA 98103 USA +ak...@sy... + +*/ + + +VariableOffsetpIMixtureDistr::VariableOffsetpIMixtureDistr(int charge, char* name, char* tag, double range, double window, double orig) : pIMixtureDistr(charge, name, tag) { + offset_init_ = orig; + offset_ = offset_init_; + vals_ = new Array<double>; + pIvals_ = new Array<double>; + offset_set_ = False; + update_ctr_ = 0; + min_ctr_ = 2; // min value for offset update + range_ = 28.0; + window_ = 1.0; +} + +Boolean VariableOffsetpIMixtureDistr::haveDataWithValue(int bin) { +for(int k = 0; k < intvals_->size(); k++) + if((*intvals_)[k] == bin) + return True; + return False; +} + +void VariableOffsetpIMixtureDistr::write_pIstats(ostream& out) { + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->write_pIstats(out); + } +} + + +void VariableOffsetpIMixtureDistr::recalc_pIstats(Array<Array<double>*>* probs) { + char val[320]; + intvals_->reserve(intvals_->size()); + vals_->reserve(vals_->size()); + pIvals_->reserve(pIvals_->size()); + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->recalc_pIstats((*probs)[i]); + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = (*run_pI_calc_)[i]->getpIScore((*run_pI_calc_)[i]->pIs_[j], (char*)(*(*run_pI_calc_)[i]->peps_)[j]); + snprintf(val, sizeof(val), "%f", dval); + pIvals_->insertAtEnd((*run_pI_calc_)[i]->pIs_[j]); + this->enter(0, val); + } + } + +} + +void VariableOffsetpIMixtureDistr::recalc_pIstats(Array<Array<double>*>* probs, double min_prob, Array<Array<int>*>* ntts, int min_ntt) { + char val[320]; + intvals_->reserve(intvals_->size()); // resets container count to 0, but leaves allocation alone for efficient repopulation + vals_->reserve(vals_->size()); // resets container count to 0, but leaves allocation alone for efficient repopulation + pIvals_->reserve(pIvals_->size()); // resets container count to 0, but leaves allocation alone for efficient repopulation + for (int i = 0; i < run_pI_calc_->size(); i++) { + if ((*run_pI_calc_)[i]->recalc_pIstats((*probs)[i], min_prob, (*ntts)[i], min_ntt)) { + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = (*run_pI_calc_)[i]->getpIScore((*run_pI_calc_)[i]->pIs_[j], (char*)(*(*run_pI_calc_)[i]->peps_)[j]); + snprintf(val, sizeof(val), "%f", dval); + pIvals_->insertAtEnd((*run_pI_calc_)[i]->pIs_[j]); + this->enter(0, val); + } + } + else { + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = 15; + snprintf(val, sizeof(val), "%f", dval); + this->enter(0, val); + } + } + } + +} + +void VariableOffsetpIMixtureDistr::calc_pIstats() { + char val[320]; + intvals_->reserve(intvals_->size()); + vals_->reserve(vals_->size()); + pIvals_->reserve(pIvals_->size()); + for (int i = 0; i < run_pI_calc_->size(); i++) { + (*run_pI_calc_)[i]->calc_pIstats(); + for (int j=0; j < (*run_pI_calc_)[i]->pIs_.size(); j++) { + double dval = (*run_pI_calc_)[i]->getpIScore((*run_pI_calc_)[i]->pIs_[j], (char*)(*(*run_pI_calc_)[i]->peps_)[j]); + snprintf(val, sizeof(val), "%f", dval); + pIvals_->insertAtEnd((*run_pI_calc_)[i]->pIs_[j]); + this->enter(0, val); + } + } + +} + +void VariableOffsetpIMixtureDistr::enter(int index, char* val) { + assert(intvals_ != NULL); + + // intvals_->insertAtEnd(inttranslate(val)); + intvals_->insertAtEnd(getpIBinNo(atof(val))); + vals_->insertAtEnd(atof(val)); // put the double value here +} + +/* Using Parent class +void VariableOffsetpIMixtureDistr::enter(SearchResult* result) { + assert(intvals_ != NULL); + if(result->pI_ > 0.0) { // already have it + intvals_->insertAtEnd(getIntegralValue(result->pI_)); + vals_->insertAtEnd(result->pI_); + } + else { +#ifdef USE_STD_MODS + double calc_pi = pI_calc_->Peptide_pI(result->peptide_, result->mod_info_); + intvals_->insertAtEnd(getIntegralValue(calc_pi)); + vals_->insertAtEnd(calc_pi); +#endif + } +} +*/ + +int VariableOffsetpIMixtureDistr::getIntegralValue(double val) { + for(int k = 0; k < numbins_; k++) + if(val <= 1.0 + (k * window_) + offset_ + window_ / 2.0) + return k; + return numbins_ - 1; + +} + +Boolean VariableOffsetpIMixtureDistr::update(Array<Array<double>*>* all_probs) { + Boolean output = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + recalc_pIstats(all_probs); + + if(! offset_set_ && update_ctr_ >= min_ctr_) { + //cerr << "here1" << endl; + double new_offset = getMode(0.1, probs); + // want just the non-integral part + //cout << "max: " << new_offset << endl; + new_offset = new_offset - (int)new_offset; // just the fraction + + if(new_offset - offset_ > maxdiff_ || offset_ - new_offset > maxdiff_) { // update + output = True; + offset_ = new_offset; + assert(vals_->size() == intvals_->size()); + for(int k = 0; k < vals_->size(); k++) { + intvals_->replace(k, getpIBinNo((*vals_)[k])); + //intvals_->replace(k, getIntegralValue((*vals_)[k])); + } + } + else { + offset_set_ = True; // done + } + } // if update offset + //cerr << "here2" << endl; + Boolean rtn = False; + if(! offset_set_) + update_ctr_++; + if(DiscreteMixtureDistr::update(probs) || output) { + rtn = True; + } + delete probs; + return rtn; + +} +Boolean VariableOffsetpIMixtureDistr::update(Array<Array<double>*>* all_probs, double min_prob, Array<Array<int>*>* all_ntts, int min_ntt) { + Boolean output = False; + + Array<double>* probs = new Array<double>; + + + for (int i=0; i<all_probs->size(); i++) { + int len = (*all_probs)[i]->size(); + for (int j=0; j<len; j++) { + probs->insertAtEnd((*(*all_probs)[i])[j]); + } + } + + recalc_pIstats(all_probs, min_prob, all_ntts, min_ntt); + + if(! offset_set_ && update_ctr_ >= min_ctr_) { + //cerr << "here1" << endl; + double new_offset = getMode(0.1, probs); + // want just the non-integral part + //cout << "max: " << new_offset << endl; + new_offset = new_offset - (int)new_offset; // just the fraction + + if(new_offset - offset_ > maxdiff_ || offset_ - new_offset > maxdiff_) { // update + output = True; + offset_ = new_offset; + assert(vals_->size() == intvals_->size()); + for(int k = 0; k < vals_->size(); k++) { + int val = getpIBinNo((*vals_)[k]); + intvals_->replace(k, val); + //intvals_->replace(k, getIntegralValue((*vals_)[k])); + } + } + else { + offset_set_ = True; // done + } + } + + Boolean rtn = False; + if(! offset_set_) + update_ctr_++; + + if(DiscreteMixtureDistr::update(probs) || output) { + rtn = True; + } + delete probs; + return rtn; + +} + +double VariableOffsetpIMixtureDistr::getMode(double window, Array<double>* probs) { + double min_tot = 5.0; + int num_windows = (int)(range_ / window); + if(probs == NULL) + return offset_init_; + + Array<double>* win = new Array<double>(); + int k; + for(k = 0; k < num_windows; k++) + win->insertAtEnd(0.0); + double max = 0.0; + int max_ind = -1; + double tot = 0.0; + + assert(probs->size() == vals_->size()); + + + for(k = 0; k < probs->size(); k++) { + if ((*probs)[k] >= 0) { + int next = (int)(((*vals_)[k]) / window); + //cerr << k << " out of " << probs->size() << ": " << next << " vs " << num_windows << endl; + if(next < 0) + next = 0; + if(next >= num_windows) + next = num_windows - 1; + (*win)[next] += (*probs)[k]; + tot += (*probs)[k]; + } + } + + // now find the max + if(tot < min_tot) { + delete win; + return offset_init_; + } + + for(k = 0; k < num_windows; k++) + if((*win)[k] > max) { + max = (*win)[k]; + max_ind = k; + } + delete win; + + if(max_ind == -1) + return offset_init_; + //cerr << update_ctr_ << ": " << max_ind << " " << max << endl; + return ((double)max_ind * window); +} + + +Array<Tag*>* VariableOffsetpIMixtureDistr::getMixtureDistrTags(char* name) { + Array<Tag*>* output = new Array<Tag*>; + Tag* next = new Tag("mixturemodel_distribution", True, False); + char text[500]; + if(name == NULL) + snprintf(text, sizeof(text), "%s (offset: %0.2f)", name_, offset_); + else + snprintf(text, sizeof(text), "%s", name); + next->setAttributeValue("name", text); + output->insertAtEnd(next); + + next = new Tag("posmodel_distribution", True, False); + output->insertAtEnd(next); + int k; + for(k = 0; k < numbins_; k++) + if(haveDataWithValue(k)) { + next = new Tag("parameter", True, True); + next->setAttributeValue("name", (*bindefs_)[k]); + snprintf(text, sizeof(text), "%0.2f", posdistr_->getProb(k)); + next->setAttributeValue("value", text); + output->insertAtEnd(next); + } + output->insertAtEnd(new Tag("posmodel_distribution", False, True)); + + next = new Tag("negmodel_distribution", True, False); + output->insertAtEnd(next); + for(k = 0; k < numbins_; k++) + if(haveDataWithValue(k)) { + next = new Tag("parameter", True, True); + next->setAttributeValue("name", (*bindefs_)[k]); + snprintf(text, sizeof(text), "%0.2f", negdistr_->getProb(k)); + next->setAttributeValue("value", text); + output->insertAtEnd(next); + } + output->insertAtEnd(new Tag("negmodel_distribution", False, True)); + output->insertAtEnd(new Tag("mixturemodel_distribution", False, True)); + return output; + +} + +void VariableOffsetpIMixtureDistr::writeDistr(FILE* fout) { + + fprintf(fout, "%s (offset: %0.2f)\n", name_, offset_); + fprintf(fout, "\tpos: "); + fprintf(fout, "("); +// int next; + int counter = 0; + int k,column_width = 4; + for(k = 0; k < numbins_; k++) { + if(haveDataWithValue(k)) { + counter++; + fprintf(fout, "%0.2f %s", posdistr_->getProb(k), (*bindefs_)[k]); + if(k < numbins_ - 1 && haveDataWithValue(k+1)) { + fprintf(fout, ", "); + } + if(counter%column_width == 0) + fprintf(fout, "\n\t\t"); + } // if have value + } + fprintf(fout, ")\n"); + + fprintf(fout, "\tneg: "); + fprintf(fout, "("); + counter = 0; + for(k = 0; k < numbins_; k++) { + if(haveDataWithValue(k)) { + counter++; + fprintf(fout, "%0.2f %s", negdistr_->getProb(k), (*bindefs_)[k]); + if(k < numbins_ - 1 && haveDataWithValue(k+1)) { + fprintf(fout, ", "); + } + if(counter%column_width == 0) + fprintf(fout, "\n\t\t"); + } // if have value + } + + fprintf(fout, ")\n"); +} + +#define RESLEN 32 +char* VariableOffsetpIMixtureDistr::getStringValue(int index) { + char* output = new char[RESLEN+1]; + if(vals_ != NULL) + snprintf(output, RESLEN, "%0.2f", (*vals_)[index]); + else + if(intvals_ != NULL) + snprintf(output, RESLEN, "%0d", (*intvals_)[index]); + return output; +} + +char* VariableOffsetpIMixtureDistr::getStringpIValue(int index) { + char* output = new char[RESLEN+1]; + if(pIvals_ != NULL) + snprintf(output, RESLEN, "%0.2f", (*pIvals_)[index]); + return output; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |