[Denovoassembler-devel] RE : kmers matrix
Ray -- Parallel genome assemblies for parallel DNA sequencing
Brought to you by:
sebhtml
|
From: Sébastien B. <seb...@ul...> - 2014-02-19 19:13:35
|
Review:
On 19 février 2014 08:41, Maxime Deraspe [ma...@de...] wrote:
> À : Sébastien Boisvert
> Objet : kmers matrix
>
> diff --git a/code/Surveyor/MatrixOwner.cpp b/code/Surveyor/MatrixOwner.cpp
> index ffaae00..13911ef 100644
> --- a/code/Surveyor/MatrixOwner.cpp
> +++ b/code/Surveyor/MatrixOwner.cpp
> @@ -36,6 +36,8 @@ MatrixOwner::MatrixOwner() {
>
> m_receivedPayloads = 0;
>
> + matricesIsReady = false;
matrices is in the plural form, you should use *are*
> +
> }
>
> MatrixOwner::~MatrixOwner() {
> @@ -65,7 +67,6 @@ void MatrixOwner::receive(Message & message) {
> assert(m_parameters != NULL);
> assert(m_sampleNames != NULL);
> #endif
> -
> m_mother = source;
>
> } else if(tag == PUSH_PAYLOAD) {
> @@ -100,8 +101,8 @@ void MatrixOwner::receive(Message & message) {
> Message response;
> response.setTag(PUSH_PAYLOAD_OK);
> send(source, response);
> -
> - } else if(tag == PUSH_PAYLOAD_END) {
> + }
> + else if(tag == PUSH_PAYLOAD_END) {
>
> m_completedStoreActors++;
>
> @@ -152,9 +153,12 @@ void MatrixOwner::receive(Message & message) {
>
> // tell Mother that the matrix is ready now.
>
> - Message coolMessage;
> - coolMessage.setTag(MATRIX_IS_READY);
> - send(m_mother, coolMessage);
> + if(matricesIsReady){
> + Message coolMessage;
> + coolMessage.setTag(MATRIX_IS_READY);
> + send(m_mother, coolMessage);
> + }
> + matricesIsReady = true;
>
>
> // clear matrices
> @@ -162,7 +166,79 @@ void MatrixOwner::receive(Message & message) {
> m_localGramMatrix.clear();
> m_kernelDistanceMatrix.clear();
> }
> + }
> + else if(tag == PUSH_KMERS_SAMPLES) {
change to PUSH_KMER_SAMPLES
> +
> + char * kmer;
> + char * samples_vector;
use upper camel case style (sampleVector)
Also, this is a dangling pointer. Using this as is will lead to random behavior.
> + // vector<bool> samples_vector;
> +
> + int offset = 0;
> +
> + memcpy(&kmer, buffer + offset, sizeof(kmer));
> + offset += sizeof(kmer);
> + memcpy(&samples_vector, buffer + offset,
> sizeof(samples_vector));
You are memcpy'ing in a unitialized pointer.
> + offset += sizeof(samples_vector);
This is always 8 bytes I think on 64 bits systems. And it does not count the bytes pointed by your pointer.
> +
> +#ifdef CONFIG_ASSERT
> + assert(kmer >= 0);
you can check if your pointer is NULL with kmer != NULL
It is invalid to compare a pointer (char * kmer) with an integer (0).
> + assert(samples_vector >= 0);
> +#endif
> + kmer[strlen(kmer)+1] = '\0';
> + samples_vector[strlen(samples_vector)+1] = '\0';
> +
> + // TODEL :
> + cout << "DEBUG push_kmers_samples : " << kmer << endl;
> +
> + string kmerS(kmer);
> + string samples_vectorS(samples_vector);
> + printLocalKmersMatrix(kmerS, samples_vectorS, false);
> +
> + Message response;
> + response.setTag(PUSH_KMERS_SAMPLES_OK);
change to PUSH_KMER_SAMPLES_OK
> + send(source, response);
> +
> }
> + else if(tag == PUSH_KMERS_SAMPLES_END) {
> +
> + char * kmer;
char kmer[255];
char * kmer = malloc(255*sizeof(char));
char * kmer = new char[255]; // not sure of the syntax
To create a Kmer from a char*
code/Mock/common_functions.h
Kmer wordId(const char*a);
To transfer a Kmer in a network buffer, use load/dump (interface CarriageableItem, Kmer implements this !)
int load(const char * buffer);
int dump(char * buffer) const;
int getRequiredNumberOfBytes() const;
to dump a vector<bool> in a network buffer:
vector<bool> kmerSamples;
kmerSamples.resize(numberOfSamples);
for(int i = 0 ; i < (int) kmerSamples.size() ; ++i)
kmerSamples[i] = false;
// fetch samples from VirtualColor
// bla bla bla
char buffer[4000];
int bytes = 0;
bytes += kmerObject.dump(buffer);
for(vector<bool>::iterator myIterator = kmerSamples.begin() ; myIterator != kmerSamples.end() ; ++myIterator) {
buffer[bytes] = *myIterator;
bytes++;
}
Message message;
message.setNumberOfBytes(bytes);
message.setBuffer(buffer);
send(HENRY, message);
> + char * samples_vector;
> + // vector<bool> samples_vector;
> +
> + int offset = 0;
> +
> + memcpy(&kmer, buffer + offset, sizeof(kmer));
> + offset += sizeof(kmer);
> + memcpy(&samples_vector, buffer + offset,
> sizeof(samples_vector));
> + offset += sizeof(samples_vector);
> +
> +#ifdef CONFIG_ASSERT
> + assert(kmer >= 0);
Check if it is NULL with this
kmer != NULL
> + assert(samples_vector >= 0);
> +#endif
> + kmer[strlen(kmer)+1] = '\0';
> + samples_vector[strlen(samples_vector)+1] = '\0';
> + // TODEL :
> + cout << "DEBUG push_kmers_samples END : " << kmer << endl;
> +
> + string kmerS(kmer);
> + string samples_vectorS(samples_vector);
> + printLocalKmersMatrix(kmerS, samples_vectorS, false);
> +
> + Message response;
> + response.setTag(PUSH_KMERS_SAMPLES_OK);
> + send(source, response);
> +
> + // tell Mother that the matrix is ready now.
> +
> + if(matricesIsReady){
> + Message coolMessage;
> + coolMessage.setTag(MATRIX_IS_READY);
> + send(m_mother, coolMessage);
> + }
You should probably create two types of Actor (KmerFileOwner and StoreKeeperIterator, or something like this)
because otherwise you have to support both personalities.
> + matricesIsReady = true;
> +
> + }
> }
>
>
> @@ -275,3 +351,29 @@ void
> MatrixOwner::printLocalGramMatrixWithHash(ostream & stream, map<SampleIdent
> stream << endl;
> }
> }
> +
> +
> +
> +void MatrixOwner::printLocalKmersMatrix(string & kmer, string &
> samples_kmers, bool force) {
> +
> + m_kmersMatrix << kmer;
> + for(std::string::iterator sampleKmerBool =
> samples_kmers.begin(); sampleKmerBool != samples_kmers.end();
> ++sampleKmerBool) {
> + // do_things_with(*sampleKmerBool);
> + m_kmersMatrix << "\t" << *sampleKmerBool;
> + // TODEL :
> + cout << "\t" << *sampleKmerBool;
> + }
> + m_kmersMatrix << endl;
> +
> +
> flushFileOperationBuffer(force,&m_kmersMatrix,&m_kmersMatrixFile, 4096);
use CONFIG_FILE_IO_BUFFER_SIZE instead of 4096.
> +}
> +
> +
> +void MatrixOwner::createKmersMatrixOutputFile() {
kmer matrix, not kmers matrix.
> +
> + ostringstream kmersMatrix;
> + kmersMatrix << m_parameters->getPrefix() << "/Surveyor/";
> + kmersMatrix << "KmersMatrix.tsv";
> + m_kmersMatrixFile.open(kmersMatrix.str().c_str());
> + // similarityFile.close();
> +}
> diff --git a/code/Surveyor/MatrixOwner.h b/code/Surveyor/MatrixOwner.h
> index ceb17e2..ef0cc5f 100644
> --- a/code/Surveyor/MatrixOwner.h
> +++ b/code/Surveyor/MatrixOwner.h
> @@ -28,6 +28,7 @@
>
> #include <map>
> #include <iostream>
> +#include <sstream>
> using namespace std;
>
> class MatrixOwner : public Actor {
> @@ -49,6 +50,15 @@ private:
>
> void computeDistanceMatrix();
>
> + ostringstream m_kmersMatrix;
> + ofstream m_kmersMatrixFile;
> +
> + void printLocalKmersMatrix(string & kmer, string &
> samples_kmers, bool force);
> + void createKmersMatrixOutputFile();
> +
> +
> + bool matricesIsReady;
> +
> public:
>
> MatrixOwner();
> @@ -62,6 +72,9 @@ public:
> PUSH_PAYLOAD,
> PUSH_PAYLOAD_OK,
> PUSH_PAYLOAD_END,
> + PUSH_KMERS_SAMPLES,
> + PUSH_KMERS_SAMPLES_OK,
> + PUSH_KMERS_SAMPLES_END,
> MATRIX_IS_READY,
> LAST_TAG
> };
> diff --git a/code/Surveyor/Mother.cpp b/code/Surveyor/Mother.cpp
> index 4d2ef9c..8fe0789 100644
> --- a/code/Surveyor/Mother.cpp
> +++ b/code/Surveyor/Mother.cpp
> @@ -410,6 +410,9 @@ void Mother::startSurveyor() {
>
> bool isRoot = (getName() % getSize()) == 0;
>
> + //TODEL
> + // m_kmersMatrixBlocNumber = 0;
> +
> //cout << "DEBUG startSurveyor isRoot" << isRoot << endl;
>
> // get a list of files.
> @@ -468,6 +471,13 @@ void Mother::startSurveyor() {
>
> m_storeKeepers.push_back(actor->getName());
>
> + //TODEL
> + // set the vector of samples into the storekeeper, and
> path to write
> + // actor->setSamplesVector(&m_sampleNames);
> + actor->setOutputKmersMatrixPath(m_parameters->getPrefix());
> + //
> actor->setKmersMatrixBlocNumber(m_kmersMatrixBlocNumber);
> + // ++m_kmersMatrixBlocNumber;
> +
> // tell the CoalescenceManager about the local StoreKeeper
> Message dummyMessage;
> int localStore = actor->getName();
> diff --git a/code/Surveyor/Mother.h b/code/Surveyor/Mother.h
> index 092920f..207127b 100644
> --- a/code/Surveyor/Mother.h
> +++ b/code/Surveyor/Mother.h
> @@ -28,6 +28,7 @@
>
> #include <vector>
> #include <string>
> +#include <iostream>
> using namespace std;
>
> /**
> @@ -93,6 +94,11 @@ private:
> */
> void sendToFirstMother(int forwardTag, int responseTag);
>
> + /* int m_kmersMatrixBlocNumber; */
> + void printLocalKmersMatrix(string & kmer, string &
> samples_kmers, bool force);
> + void createKmersMatrixOutputFile();
> +
> +
> public:
>
> Mother();
> diff --git a/code/Surveyor/StoreKeeper.cpp b/code/Surveyor/StoreKeeper.cpp
> index 84eef34..0dd84e3 100644
> --- a/code/Surveyor/StoreKeeper.cpp
> +++ b/code/Surveyor/StoreKeeper.cpp
> @@ -24,8 +24,13 @@
> #include "MatrixOwner.h"
>
> #include <code/VerticesExtractor/Vertex.h>
> +#include <RayPlatform/structures/MyHashTableIterator.h>
> +#include <RayPlatform/core/OperatingSystem.h>
>
> -#include <iostream>
> +#include <iostream>
> +#include <sstream>
> +#include <iomanip>
> +#include <fstream>
> using namespace std;
>
> #include <string.h>
> @@ -92,6 +97,12 @@ void StoreKeeper::receive(Message & message) {
> cout << " with " << m_receivedPushes << " push operations" << endl;
> computeLocalGramMatrix();
>
> +
> + // TODEL Print matrix bloc
> + // m_kmersMatrixBlocNumber = 0;
block
> + // printLocalKmersMatrix();
> +
> +
> m_mother = source;
>
> memcpy(&m_matrixOwner, buffer, sizeof(m_matrixOwner));
> @@ -114,13 +125,19 @@ void StoreKeeper::receive(Message & message) {
> printLocalGramMatrix();
> */
>
> + m_hashTableIterator.constructor(&m_hashTable);
> +
> sendMatrixCell();
>
> - } else if(tag == MatrixOwner::PUSH_PAYLOAD_OK) {
> + sendKmersSamples();
>
> + } else if(tag == MatrixOwner::PUSH_PAYLOAD_OK) {
> sendMatrixCell();
>
> - } else if(tag == CoalescenceManager::SET_KMER_LENGTH) {
> + } else if(tag == MatrixOwner::PUSH_KMERS_SAMPLES_OK) {
> + sendKmersSamples();
> + }
> + else if(tag == CoalescenceManager::SET_KMER_LENGTH) {
>
> int kmerLength = 0;
> int position = 0;
> @@ -236,6 +253,8 @@ void StoreKeeper::configureHashTable() {
> );
>
> m_configured = true;
> +
> + // m_hashTableIterator.constructor(&m_hashTable);
> }
>
> void StoreKeeper::printColorReport() {
> @@ -375,6 +394,7 @@ void StoreKeeper::computeLocalGramMatrix() {
> //printLocalGramMatrix();
> }
>
> +
> void StoreKeeper::printLocalGramMatrix() {
>
> printName();
> @@ -623,3 +643,123 @@ void StoreKeeper::storeData(Vertex & vertex, int &
> sample) {
>
> */
> }
> +
> +
> +// void StoreKeeper::setSamplesVector(vector<string> * samplesId) {
> +// for (std::vector<bool>::iterator it = samplesVector.begin() ;
> +// it != samplesVector.end(); ++it) {
> +// m_currentSamplesKmers << *it << "\t";
> +// }
> +// m_currentSamplesKmers = samplesId;
> +// }
> +
> +void StoreKeeper::setOutputKmersMatrixPath(string pathPrefix) {
> + // m_outputKmersMatrixPath = pathPrefix;
> + // m_outputKmersMatrixPath += "/KmersMatrixDump/";
> + // createDirectory(m_outputKmersMatrixPath.c_str());
> +}
> +
> +
> +// void StoreKeeper::setKmersMatrixBlocNumber(int blocNb) {
> +
> +// // m_kmersMatrixBlocNumber = blocNb;
> +// }
> +
> +void StoreKeeper::sendKmersSamples() {
> +
> + cout << "sendKmersSamples_traces"<< endl;
> +
> + string kmerString;
> + string samplesKmers;
> +
> + printLocalKmersMatrix(kmerString, samplesKmers);
> +
> + cout << "DEBUG sendKmersSamples :" << kmerString <<
> samplesKmers << endl;
> +
> +
> + Message message;
> + char buffer[4096];
> + int offset = 0;
> +
> + memcpy(buffer + offset, kmerString.c_str(), kmerString.length());
> + offset += kmerString.length();
> + memcpy(buffer + offset, samplesKmers.c_str(),
> samplesKmers.length());
> + offset += samplesKmers.length();
> +
> + message.setBuffer(buffer);
> + message.setNumberOfBytes(offset);
> +
> + message.setTag(MatrixOwner::PUSH_KMERS_SAMPLES);
> + if(m_hashTableIterator.hasNext()){
> + message.setTag(MatrixOwner::PUSH_KMERS_SAMPLES);
> + }else{
> + message.setTag(MatrixOwner::PUSH_KMERS_SAMPLES_END);
> + }
> +
> + send(m_matrixOwner, message);
> +
> +}
> +
> +
> +void StoreKeeper::printLocalKmersMatrix(string & kmerString, string &
> samplesKmers) {
> +
> + ExperimentVertex * currentVertex;
> + VirtualKmerColorHandle currentVirtualColor;
> +
> + vector<bool> samplesVector (m_currentSamplesKmers.tellp(), false);
> +
> + // ofstream kmersMatrixOutFile;
> + // stringstream matrixOutFileName;
> +
> + // m_currentKmer.clear();
> + // m_currentSamplesKmers.clear();
> +
> + cout << "YOYOYO "<< m_hashTableIterator.hasNext() << endl;
> + // matrixOutFileName << m_outputKmersMatrixPath;
> + // matrixOutFileName << "kmatrix_bloc-";
> + // matrixOutFileName << setw(3) << setfill('0') <<
> m_kmersMatrixBlocNumber;
> + // matrixOutFileName << ".tsv";
> +
> + //
> kmersMatrixOutFile.open(matrixOutFileName.str().c_str(),ios::app);
> +
> + if(m_hashTableIterator.hasNext()){
> +
> + fill(samplesVector.begin(),samplesVector.end(),false);
> + currentVertex = m_hashTableIterator.next();
> + Kmer kmer = currentVertex->getKey();
> +
> + // cout << "DEBUG vertex :" <<
> kmer.idToWord(m_kmerLength, m_colorSpaceMode) << " color: ";
> + // kmersMatrixOutFile << kmer.idToWord(m_kmerLength,
> m_colorSpaceMode) << "\t";
> + // m_currentKmer << kmer.idToWord(m_kmerLength,
> m_colorSpaceMode) << "\t";
> + kmerString = kmer.idToWord(m_kmerLength, m_colorSpaceMode);
> +
> + currentVirtualColor = currentVertex->getVirtualColor();
> + set<PhysicalKmerColor> * samples =
> m_colorSet.getPhysicalColors(currentVirtualColor);
> +
> + for(set<PhysicalKmerColor>:: iterator sampleIterator =
> samples->begin();
> + sampleIterator != samples->end(); ++sampleIterator) {
> + PhysicalKmerColor value = *sampleIterator;
> + samplesVector[value] = true;
> + // cout << " " << value;
> + }
> +
> + for (std::vector<bool>::iterator it =
> samplesVector.begin() ;
> + it != samplesVector.end(); ++it) {
> + // m_currentSamplesKmers << *it << "\t";
> + samplesKmers += '\t';
> + samplesKmers += *it;
> + }
> +
> + // cout << endl;
> + // samplesKmers += '\n';
> + // m_currentSamplesKmers << '\n';
> + // kmersMatrixOutFile << endl;
> + }
> +
> + cout << "DEBUG printLocalKmers " << kmerString << samplesKmers
> << endl;
> +
> + // kmerString = m_currentKmer.str();
> + // samplesKmers = m_currentSamplesKmers.str();
> + // kmersMatrixOutFile.close();
> + // m_kmersMatrixBlocNumber++;
> +}
> diff --git a/code/Surveyor/StoreKeeper.h b/code/Surveyor/StoreKeeper.h
> index e44cf98..94ced7a 100644
> --- a/code/Surveyor/StoreKeeper.h
> +++ b/code/Surveyor/StoreKeeper.h
> @@ -34,6 +34,10 @@
>
> #include <RayPlatform/actors/Actor.h>
> #include <RayPlatform/structures/MyHashTable.h>
> +#include <RayPlatform/structures/MyHashTableIterator.h>
> +
> +#include <iostream>
> +#include <sstream>
>
> /**
> * Provides genomic storage.
> @@ -64,6 +68,8 @@ private:
> */
> MyHashTable<Kmer,ExperimentVertex> m_hashTable;
>
> + MyHashTableIterator<Kmer,ExperimentVertex> m_hashTableIterator;
> +
> int m_kmerLength;
> bool m_colorSpaceMode;
>
> @@ -79,6 +85,13 @@ private:
> void printLocalGramMatrix();
> void printColorReport();
>
> + ostringstream m_currentKmer;
> + ostringstream m_currentSamplesKmers;
> + string m_outputKmersMatrixPath;
> + void printLocalKmersMatrix(string & m_kmer, string &
> m_samplesKmers);
> +
> + void sendKmersSamples();
> +
> void sendMatrixCell();
>
> public:
> @@ -86,6 +99,8 @@ public:
> StoreKeeper();
> ~StoreKeeper();
>
> + void setOutputKmersMatrixPath(string pathPrefix);
> +
> void receive(Message & message);
>
> enum {
>
|