[Denovoassembler-devel] RE : kmers matrix
Ray -- Parallel genome assemblies for parallel DNA sequencing
Brought to you by:
sebhtml
From: Sébastien B. <seb...@ul...> - 2014-02-19 19:13:35
|
Review: On 19 février 2014 08:41, Maxime Deraspe [ma...@de...] wrote: > À : Sébastien Boisvert > Objet : kmers matrix > > diff --git a/code/Surveyor/MatrixOwner.cpp b/code/Surveyor/MatrixOwner.cpp > index ffaae00..13911ef 100644 > --- a/code/Surveyor/MatrixOwner.cpp > +++ b/code/Surveyor/MatrixOwner.cpp > @@ -36,6 +36,8 @@ MatrixOwner::MatrixOwner() { > > m_receivedPayloads = 0; > > + matricesIsReady = false; matrices is in the plural form, you should use *are* > + > } > > MatrixOwner::~MatrixOwner() { > @@ -65,7 +67,6 @@ void MatrixOwner::receive(Message & message) { > assert(m_parameters != NULL); > assert(m_sampleNames != NULL); > #endif > - > m_mother = source; > > } else if(tag == PUSH_PAYLOAD) { > @@ -100,8 +101,8 @@ void MatrixOwner::receive(Message & message) { > Message response; > response.setTag(PUSH_PAYLOAD_OK); > send(source, response); > - > - } else if(tag == PUSH_PAYLOAD_END) { > + } > + else if(tag == PUSH_PAYLOAD_END) { > > m_completedStoreActors++; > > @@ -152,9 +153,12 @@ void MatrixOwner::receive(Message & message) { > > // tell Mother that the matrix is ready now. > > - Message coolMessage; > - coolMessage.setTag(MATRIX_IS_READY); > - send(m_mother, coolMessage); > + if(matricesIsReady){ > + Message coolMessage; > + coolMessage.setTag(MATRIX_IS_READY); > + send(m_mother, coolMessage); > + } > + matricesIsReady = true; > > > // clear matrices > @@ -162,7 +166,79 @@ void MatrixOwner::receive(Message & message) { > m_localGramMatrix.clear(); > m_kernelDistanceMatrix.clear(); > } > + } > + else if(tag == PUSH_KMERS_SAMPLES) { change to PUSH_KMER_SAMPLES > + > + char * kmer; > + char * samples_vector; use upper camel case style (sampleVector) Also, this is a dangling pointer. Using this as is will lead to random behavior. > + // vector<bool> samples_vector; > + > + int offset = 0; > + > + memcpy(&kmer, buffer + offset, sizeof(kmer)); > + offset += sizeof(kmer); > + memcpy(&samples_vector, buffer + offset, > sizeof(samples_vector)); You are memcpy'ing in a unitialized pointer. > + offset += sizeof(samples_vector); This is always 8 bytes I think on 64 bits systems. And it does not count the bytes pointed by your pointer. > + > +#ifdef CONFIG_ASSERT > + assert(kmer >= 0); you can check if your pointer is NULL with kmer != NULL It is invalid to compare a pointer (char * kmer) with an integer (0). > + assert(samples_vector >= 0); > +#endif > + kmer[strlen(kmer)+1] = '\0'; > + samples_vector[strlen(samples_vector)+1] = '\0'; > + > + // TODEL : > + cout << "DEBUG push_kmers_samples : " << kmer << endl; > + > + string kmerS(kmer); > + string samples_vectorS(samples_vector); > + printLocalKmersMatrix(kmerS, samples_vectorS, false); > + > + Message response; > + response.setTag(PUSH_KMERS_SAMPLES_OK); change to PUSH_KMER_SAMPLES_OK > + send(source, response); > + > } > + else if(tag == PUSH_KMERS_SAMPLES_END) { > + > + char * kmer; char kmer[255]; char * kmer = malloc(255*sizeof(char)); char * kmer = new char[255]; // not sure of the syntax To create a Kmer from a char* code/Mock/common_functions.h Kmer wordId(const char*a); To transfer a Kmer in a network buffer, use load/dump (interface CarriageableItem, Kmer implements this !) int load(const char * buffer); int dump(char * buffer) const; int getRequiredNumberOfBytes() const; to dump a vector<bool> in a network buffer: vector<bool> kmerSamples; kmerSamples.resize(numberOfSamples); for(int i = 0 ; i < (int) kmerSamples.size() ; ++i) kmerSamples[i] = false; // fetch samples from VirtualColor // bla bla bla char buffer[4000]; int bytes = 0; bytes += kmerObject.dump(buffer); for(vector<bool>::iterator myIterator = kmerSamples.begin() ; myIterator != kmerSamples.end() ; ++myIterator) { buffer[bytes] = *myIterator; bytes++; } Message message; message.setNumberOfBytes(bytes); message.setBuffer(buffer); send(HENRY, message); > + char * samples_vector; > + // vector<bool> samples_vector; > + > + int offset = 0; > + > + memcpy(&kmer, buffer + offset, sizeof(kmer)); > + offset += sizeof(kmer); > + memcpy(&samples_vector, buffer + offset, > sizeof(samples_vector)); > + offset += sizeof(samples_vector); > + > +#ifdef CONFIG_ASSERT > + assert(kmer >= 0); Check if it is NULL with this kmer != NULL > + assert(samples_vector >= 0); > +#endif > + kmer[strlen(kmer)+1] = '\0'; > + samples_vector[strlen(samples_vector)+1] = '\0'; > + // TODEL : > + cout << "DEBUG push_kmers_samples END : " << kmer << endl; > + > + string kmerS(kmer); > + string samples_vectorS(samples_vector); > + printLocalKmersMatrix(kmerS, samples_vectorS, false); > + > + Message response; > + response.setTag(PUSH_KMERS_SAMPLES_OK); > + send(source, response); > + > + // tell Mother that the matrix is ready now. > + > + if(matricesIsReady){ > + Message coolMessage; > + coolMessage.setTag(MATRIX_IS_READY); > + send(m_mother, coolMessage); > + } You should probably create two types of Actor (KmerFileOwner and StoreKeeperIterator, or something like this) because otherwise you have to support both personalities. > + matricesIsReady = true; > + > + } > } > > > @@ -275,3 +351,29 @@ void > MatrixOwner::printLocalGramMatrixWithHash(ostream & stream, map<SampleIdent > stream << endl; > } > } > + > + > + > +void MatrixOwner::printLocalKmersMatrix(string & kmer, string & > samples_kmers, bool force) { > + > + m_kmersMatrix << kmer; > + for(std::string::iterator sampleKmerBool = > samples_kmers.begin(); sampleKmerBool != samples_kmers.end(); > ++sampleKmerBool) { > + // do_things_with(*sampleKmerBool); > + m_kmersMatrix << "\t" << *sampleKmerBool; > + // TODEL : > + cout << "\t" << *sampleKmerBool; > + } > + m_kmersMatrix << endl; > + > + > flushFileOperationBuffer(force,&m_kmersMatrix,&m_kmersMatrixFile, 4096); use CONFIG_FILE_IO_BUFFER_SIZE instead of 4096. > +} > + > + > +void MatrixOwner::createKmersMatrixOutputFile() { kmer matrix, not kmers matrix. > + > + ostringstream kmersMatrix; > + kmersMatrix << m_parameters->getPrefix() << "/Surveyor/"; > + kmersMatrix << "KmersMatrix.tsv"; > + m_kmersMatrixFile.open(kmersMatrix.str().c_str()); > + // similarityFile.close(); > +} > diff --git a/code/Surveyor/MatrixOwner.h b/code/Surveyor/MatrixOwner.h > index ceb17e2..ef0cc5f 100644 > --- a/code/Surveyor/MatrixOwner.h > +++ b/code/Surveyor/MatrixOwner.h > @@ -28,6 +28,7 @@ > > #include <map> > #include <iostream> > +#include <sstream> > using namespace std; > > class MatrixOwner : public Actor { > @@ -49,6 +50,15 @@ private: > > void computeDistanceMatrix(); > > + ostringstream m_kmersMatrix; > + ofstream m_kmersMatrixFile; > + > + void printLocalKmersMatrix(string & kmer, string & > samples_kmers, bool force); > + void createKmersMatrixOutputFile(); > + > + > + bool matricesIsReady; > + > public: > > MatrixOwner(); > @@ -62,6 +72,9 @@ public: > PUSH_PAYLOAD, > PUSH_PAYLOAD_OK, > PUSH_PAYLOAD_END, > + PUSH_KMERS_SAMPLES, > + PUSH_KMERS_SAMPLES_OK, > + PUSH_KMERS_SAMPLES_END, > MATRIX_IS_READY, > LAST_TAG > }; > diff --git a/code/Surveyor/Mother.cpp b/code/Surveyor/Mother.cpp > index 4d2ef9c..8fe0789 100644 > --- a/code/Surveyor/Mother.cpp > +++ b/code/Surveyor/Mother.cpp > @@ -410,6 +410,9 @@ void Mother::startSurveyor() { > > bool isRoot = (getName() % getSize()) == 0; > > + //TODEL > + // m_kmersMatrixBlocNumber = 0; > + > //cout << "DEBUG startSurveyor isRoot" << isRoot << endl; > > // get a list of files. > @@ -468,6 +471,13 @@ void Mother::startSurveyor() { > > m_storeKeepers.push_back(actor->getName()); > > + //TODEL > + // set the vector of samples into the storekeeper, and > path to write > + // actor->setSamplesVector(&m_sampleNames); > + actor->setOutputKmersMatrixPath(m_parameters->getPrefix()); > + // > actor->setKmersMatrixBlocNumber(m_kmersMatrixBlocNumber); > + // ++m_kmersMatrixBlocNumber; > + > // tell the CoalescenceManager about the local StoreKeeper > Message dummyMessage; > int localStore = actor->getName(); > diff --git a/code/Surveyor/Mother.h b/code/Surveyor/Mother.h > index 092920f..207127b 100644 > --- a/code/Surveyor/Mother.h > +++ b/code/Surveyor/Mother.h > @@ -28,6 +28,7 @@ > > #include <vector> > #include <string> > +#include <iostream> > using namespace std; > > /** > @@ -93,6 +94,11 @@ private: > */ > void sendToFirstMother(int forwardTag, int responseTag); > > + /* int m_kmersMatrixBlocNumber; */ > + void printLocalKmersMatrix(string & kmer, string & > samples_kmers, bool force); > + void createKmersMatrixOutputFile(); > + > + > public: > > Mother(); > diff --git a/code/Surveyor/StoreKeeper.cpp b/code/Surveyor/StoreKeeper.cpp > index 84eef34..0dd84e3 100644 > --- a/code/Surveyor/StoreKeeper.cpp > +++ b/code/Surveyor/StoreKeeper.cpp > @@ -24,8 +24,13 @@ > #include "MatrixOwner.h" > > #include <code/VerticesExtractor/Vertex.h> > +#include <RayPlatform/structures/MyHashTableIterator.h> > +#include <RayPlatform/core/OperatingSystem.h> > > -#include <iostream> > +#include <iostream> > +#include <sstream> > +#include <iomanip> > +#include <fstream> > using namespace std; > > #include <string.h> > @@ -92,6 +97,12 @@ void StoreKeeper::receive(Message & message) { > cout << " with " << m_receivedPushes << " push operations" << endl; > computeLocalGramMatrix(); > > + > + // TODEL Print matrix bloc > + // m_kmersMatrixBlocNumber = 0; block > + // printLocalKmersMatrix(); > + > + > m_mother = source; > > memcpy(&m_matrixOwner, buffer, sizeof(m_matrixOwner)); > @@ -114,13 +125,19 @@ void StoreKeeper::receive(Message & message) { > printLocalGramMatrix(); > */ > > + m_hashTableIterator.constructor(&m_hashTable); > + > sendMatrixCell(); > > - } else if(tag == MatrixOwner::PUSH_PAYLOAD_OK) { > + sendKmersSamples(); > > + } else if(tag == MatrixOwner::PUSH_PAYLOAD_OK) { > sendMatrixCell(); > > - } else if(tag == CoalescenceManager::SET_KMER_LENGTH) { > + } else if(tag == MatrixOwner::PUSH_KMERS_SAMPLES_OK) { > + sendKmersSamples(); > + } > + else if(tag == CoalescenceManager::SET_KMER_LENGTH) { > > int kmerLength = 0; > int position = 0; > @@ -236,6 +253,8 @@ void StoreKeeper::configureHashTable() { > ); > > m_configured = true; > + > + // m_hashTableIterator.constructor(&m_hashTable); > } > > void StoreKeeper::printColorReport() { > @@ -375,6 +394,7 @@ void StoreKeeper::computeLocalGramMatrix() { > //printLocalGramMatrix(); > } > > + > void StoreKeeper::printLocalGramMatrix() { > > printName(); > @@ -623,3 +643,123 @@ void StoreKeeper::storeData(Vertex & vertex, int & > sample) { > > */ > } > + > + > +// void StoreKeeper::setSamplesVector(vector<string> * samplesId) { > +// for (std::vector<bool>::iterator it = samplesVector.begin() ; > +// it != samplesVector.end(); ++it) { > +// m_currentSamplesKmers << *it << "\t"; > +// } > +// m_currentSamplesKmers = samplesId; > +// } > + > +void StoreKeeper::setOutputKmersMatrixPath(string pathPrefix) { > + // m_outputKmersMatrixPath = pathPrefix; > + // m_outputKmersMatrixPath += "/KmersMatrixDump/"; > + // createDirectory(m_outputKmersMatrixPath.c_str()); > +} > + > + > +// void StoreKeeper::setKmersMatrixBlocNumber(int blocNb) { > + > +// // m_kmersMatrixBlocNumber = blocNb; > +// } > + > +void StoreKeeper::sendKmersSamples() { > + > + cout << "sendKmersSamples_traces"<< endl; > + > + string kmerString; > + string samplesKmers; > + > + printLocalKmersMatrix(kmerString, samplesKmers); > + > + cout << "DEBUG sendKmersSamples :" << kmerString << > samplesKmers << endl; > + > + > + Message message; > + char buffer[4096]; > + int offset = 0; > + > + memcpy(buffer + offset, kmerString.c_str(), kmerString.length()); > + offset += kmerString.length(); > + memcpy(buffer + offset, samplesKmers.c_str(), > samplesKmers.length()); > + offset += samplesKmers.length(); > + > + message.setBuffer(buffer); > + message.setNumberOfBytes(offset); > + > + message.setTag(MatrixOwner::PUSH_KMERS_SAMPLES); > + if(m_hashTableIterator.hasNext()){ > + message.setTag(MatrixOwner::PUSH_KMERS_SAMPLES); > + }else{ > + message.setTag(MatrixOwner::PUSH_KMERS_SAMPLES_END); > + } > + > + send(m_matrixOwner, message); > + > +} > + > + > +void StoreKeeper::printLocalKmersMatrix(string & kmerString, string & > samplesKmers) { > + > + ExperimentVertex * currentVertex; > + VirtualKmerColorHandle currentVirtualColor; > + > + vector<bool> samplesVector (m_currentSamplesKmers.tellp(), false); > + > + // ofstream kmersMatrixOutFile; > + // stringstream matrixOutFileName; > + > + // m_currentKmer.clear(); > + // m_currentSamplesKmers.clear(); > + > + cout << "YOYOYO "<< m_hashTableIterator.hasNext() << endl; > + // matrixOutFileName << m_outputKmersMatrixPath; > + // matrixOutFileName << "kmatrix_bloc-"; > + // matrixOutFileName << setw(3) << setfill('0') << > m_kmersMatrixBlocNumber; > + // matrixOutFileName << ".tsv"; > + > + // > kmersMatrixOutFile.open(matrixOutFileName.str().c_str(),ios::app); > + > + if(m_hashTableIterator.hasNext()){ > + > + fill(samplesVector.begin(),samplesVector.end(),false); > + currentVertex = m_hashTableIterator.next(); > + Kmer kmer = currentVertex->getKey(); > + > + // cout << "DEBUG vertex :" << > kmer.idToWord(m_kmerLength, m_colorSpaceMode) << " color: "; > + // kmersMatrixOutFile << kmer.idToWord(m_kmerLength, > m_colorSpaceMode) << "\t"; > + // m_currentKmer << kmer.idToWord(m_kmerLength, > m_colorSpaceMode) << "\t"; > + kmerString = kmer.idToWord(m_kmerLength, m_colorSpaceMode); > + > + currentVirtualColor = currentVertex->getVirtualColor(); > + set<PhysicalKmerColor> * samples = > m_colorSet.getPhysicalColors(currentVirtualColor); > + > + for(set<PhysicalKmerColor>:: iterator sampleIterator = > samples->begin(); > + sampleIterator != samples->end(); ++sampleIterator) { > + PhysicalKmerColor value = *sampleIterator; > + samplesVector[value] = true; > + // cout << " " << value; > + } > + > + for (std::vector<bool>::iterator it = > samplesVector.begin() ; > + it != samplesVector.end(); ++it) { > + // m_currentSamplesKmers << *it << "\t"; > + samplesKmers += '\t'; > + samplesKmers += *it; > + } > + > + // cout << endl; > + // samplesKmers += '\n'; > + // m_currentSamplesKmers << '\n'; > + // kmersMatrixOutFile << endl; > + } > + > + cout << "DEBUG printLocalKmers " << kmerString << samplesKmers > << endl; > + > + // kmerString = m_currentKmer.str(); > + // samplesKmers = m_currentSamplesKmers.str(); > + // kmersMatrixOutFile.close(); > + // m_kmersMatrixBlocNumber++; > +} > diff --git a/code/Surveyor/StoreKeeper.h b/code/Surveyor/StoreKeeper.h > index e44cf98..94ced7a 100644 > --- a/code/Surveyor/StoreKeeper.h > +++ b/code/Surveyor/StoreKeeper.h > @@ -34,6 +34,10 @@ > > #include <RayPlatform/actors/Actor.h> > #include <RayPlatform/structures/MyHashTable.h> > +#include <RayPlatform/structures/MyHashTableIterator.h> > + > +#include <iostream> > +#include <sstream> > > /** > * Provides genomic storage. > @@ -64,6 +68,8 @@ private: > */ > MyHashTable<Kmer,ExperimentVertex> m_hashTable; > > + MyHashTableIterator<Kmer,ExperimentVertex> m_hashTableIterator; > + > int m_kmerLength; > bool m_colorSpaceMode; > > @@ -79,6 +85,13 @@ private: > void printLocalGramMatrix(); > void printColorReport(); > > + ostringstream m_currentKmer; > + ostringstream m_currentSamplesKmers; > + string m_outputKmersMatrixPath; > + void printLocalKmersMatrix(string & m_kmer, string & > m_samplesKmers); > + > + void sendKmersSamples(); > + > void sendMatrixCell(); > > public: > @@ -86,6 +99,8 @@ public: > StoreKeeper(); > ~StoreKeeper(); > > + void setOutputKmersMatrixPath(string pathPrefix); > + > void receive(Message & message); > > enum { > |