From: <bpe...@us...> - 2016-01-16 07:13:38
|
Revision: 4997 http://sourceforge.net/p/simupop/code/4997 Author: bpeng2000 Date: 2016-01-16 07:13:35 +0000 (Sat, 16 Jan 2016) Log Message: ----------- Initial support for migration with backward migration matrix Modified Paths: -------------- trunk/src/__init__.py trunk/src/migrator.cpp trunk/src/migrator.h Modified: trunk/src/__init__.py =================================================================== --- trunk/src/__init__.py 2015-11-01 03:09:25 UTC (rev 4996) +++ trunk/src/__init__.py 2016-01-16 07:13:35 UTC (rev 4997) @@ -166,6 +166,7 @@ 'InfoExec', # 'Migrator', + 'BackwardMigrator', 'MergeSubPops', 'SplitSubPops', 'ResizeSubPops', @@ -245,6 +246,7 @@ 'tagID', # 'migrate', + 'backwardMigrate', 'resizeSubPops', 'splitSubPops', 'mergeSubPops', @@ -1132,6 +1134,10 @@ 'Function form of operator ``Migrator``.' Migrator(*args, **kwargs).apply(pop) +def backwardMigrate(pop, *args, **kwargs): + 'Function form of operator ``BackwardMigrator``.' + BackwardMigrator(*args, **kwargs).apply(pop) + def splitSubPops(pop, *args, **kwargs): '''Split subpopulations (*subPops*) of population *pop* according to either *sizes* or *proportions* of the resulting subpopulations, or an information Modified: trunk/src/migrator.cpp =================================================================== --- trunk/src/migrator.cpp 2015-11-01 03:09:25 UTC (rev 4996) +++ trunk/src/migrator.cpp 2016-01-16 07:13:35 UTC (rev 4997) @@ -26,6 +26,9 @@ #include "virtualSubPop.h" #include "migrator.h" +#include <boost/numeric/ublas/lu.hpp> +#include <boost/numeric/ublas/io.hpp> + namespace simuPOP { Migrator::Migrator(const floatMatrix & rate, int mode, const uintList & toSubPops, @@ -145,7 +148,6 @@ } } - for (size_t from = 0, fromEnd = fromSubPops.size(); from < fromEnd; ++from) { size_t spFrom = fromSubPops[from].subPop(); // rateSize might be toSize + 1, the last one is from->from @@ -264,6 +266,243 @@ } + +BackwardMigrator::BackwardMigrator(const floatMatrix & rate, int mode, + int begin, int end, int step, const intList & at, + const intList & reps, const subPopList & subPops, const stringList & infoFields) + : BaseOperator("", begin, end, step, at, reps, subPops, infoFields), + m_rate(rate.elems()), m_inverse_rate(rate.elems().size(), rate.elems().size()), m_mode(mode), m_symmetric_matrix(true) +{ + DBG_FAILIF(!subPops.empty() && subPops.size() != m_rate.size(), + ValueError, "Length of param subPop must match rows of rate matrix."); + DBG_FAILIF(m_mode != BY_PROBABILITY && m_mode != BY_PROPORTION, + ValueError, "Only BY_PROBABILITY and BY_PROPORTION is supported by BackMigrator"); + + // find the inverse of B' + size_t sz = m_rate.size(); + boost::numeric::ublas::matrix<double> Bt(sz, sz); + for (size_t i = 0; i < sz; ++i) { + if (m_rate[i].size() != sz) + throw ValueError("A m by m matrix is required for backward migration matrix."); + // + for (size_t j = 0; j < sz; ++j) { + if (i == j) { + Bt(i, i) = accumulate(m_rate[i].begin(), m_rate[i].end(), 0.0) - m_rate[i][i]; + Bt(i, i) = 1. - Bt(i, i); + } else + Bt(i, j) = m_rate[j][i]; + if (fcmp_lt(Bt(i, j), 0.)) + throw ValueError((boost::format("Backward migration rate should be positive. %d provided.") % Bt(i,j)).str()); + if (fcmp_gt(Bt(i, j), 1.)) + throw ValueError((boost::format("Backward migration rate should be in the range of [0,1], %d provided.") % Bt(i,j)).str()); + if (j > i && m_rate[i][j] != m_rate[j][i]) + m_symmetric_matrix = false; + } + } + // inverse + DBG_DO(DBG_MIGRATOR, cerr << "Transpose of backward migration matrix B is " << Bt << endl); + boost::numeric::ublas::permutation_matrix<std::size_t> pm(sz); + int res = lu_factorize(Bt, pm); + if (res != 0) + throw RuntimeError("Failed to convert backward matrix to forward migration matrix. (Matrix is not inversable)."); + m_inverse_rate.assign(boost::numeric::ublas::identity_matrix<double>(sz)); + // backsubstite to get the inverse + lu_substitute(Bt, pm, m_inverse_rate); + + DBG_DO(DBG_MIGRATOR, cerr << "Inverse of B' is " << m_inverse_rate << endl); +} + + +string BackwardMigrator::describe(bool /* format */) const +{ + return "<simuPOP.BackwardMigrator>"; +} + + +bool BackwardMigrator::apply(Population & pop) const +{ + // set info of individual + size_t info = pop.infoIdx(infoField(0)); + + subPopList VSPs = applicableSubPops(pop); + if (VSPs.size() <= 1) + return true; + + DBG_FAILIF(VSPs.size() != m_rate.size(), + ValueError, "Number of 'from' subpopulations should match number of rows of migration rate matrix."); + + vectoru subPops; + for (size_t i = 0; i < VSPs.size(); ++i) { + DBG_FAILIF(VSPs[i].isVirtual(), ValueError, + "BackwardMigrator does not support virtual subpupulations.") + DBG_FAILIF(m_rate[i].size() != VSPs.size(), ValueError, + "A square matrix is required for BackwardMigrator") + subPops.push_back(VSPs[i].subPop()); + } + + // assign individuals their own subpopulation ID + for (size_t sp = 0; sp < pop.numSubPop(); ++sp) { + RawIndIterator it = pop.rawIndBegin(sp); + RawIndIterator it_end = pop.rawIndEnd(sp); + if (numThreads() > 1) { +#ifdef _OPENMP + size_t popSize = it_end - it; +# pragma omp parallel firstprivate(it, it_end) + { + size_t id = omp_get_thread_num(); + it = it + id * (popSize / numThreads()); + it_end = id == numThreads() - 1 ? it_end : it + popSize / numThreads(); + for (; it != it_end; ++it) + it->setInfo(static_cast<double>(sp), info); + } +#endif + } else { + for (; it != it_end; ++it) + it->setInfo(static_cast<double>(sp), info); + } + } + + DBG_FAILIF(pop.hasActivatedVirtualSubPop(), RuntimeError, + "Migration can not be applied to activated virtual subpopulations"); + + // subpopulation size S (before migration) + vectoru S; + for (size_t i = 0; i < subPops.size(); ++i) + S.push_back(pop.subPopSize(subPops[i])); + + // now, we need to calculate a forward migration matrix from the backward one + // the formula is + + // S' = B'^-1 * S + // F = diag(S)^-1 B' diag(S') + // + // but for the special case of equal population size and symmetric matrix, + // two matrices are the same + bool simple_case = m_symmetric_matrix; + // symmtrix matrix, check if equal population size + if (simple_case) { + for (size_t i = 1; i < subPops.size(); ++i) + if (S[i] != S[i-1]) + simple_case = false; + } + + size_t sz = m_rate.size(); + // if not the simple case, we need to calculate rate... + matrixf migrationRate; + if (simple_case) + migrationRate = m_rate; + else { + // with Bt^-1, we can calculate expected population size + vectorf Sp(sz); + for (size_t i = 0; i < sz; ++i) { + Sp[i] = 0; + for (size_t j = 0; j < sz; ++j) + Sp[i] += m_inverse_rate(i, j) * S[j]; + } + DBG_DO(DBG_MIGRATOR, cerr << "Expected next population size is " << Sp << endl); + for (size_t i = 0; i < sz; ++i) { + if (Sp[i] <= 0) + throw RuntimeError((boost::format("Failed to calculate forward migration matrix: negative expected population size %f for subpopulation %d") + % Sp[i] % i).str()); + } + // now, F = diag(S)^-1 * BT * diag (S') + migrationRate = m_rate; + for (size_t i = 0; i < sz; ++i) + for (size_t j = 0; j < sz; ++j) + // m_rate[i][i] might not be defined. + migrationRate[i][j] = m_rate[j][i] * Sp[j] / S[i]; + } + + // check parameters + for (size_t i = 0; i < sz; ++i) { + for (size_t j = 0; j < sz; ++j) { + if (fcmp_lt(migrationRate[i][j], 0.)) + throw ValueError("Converted forward migration rate should be positive."); + if (fcmp_gt(migrationRate[i][j], 1.)) + throw ValueError("Converted forward migration rate should be in the range of [0,1]"); + } + // look for from=to cell. + double sum = accumulate(migrationRate[i].begin(), migrationRate[i].end(), 0.0); + // + double & self = migrationRate[i][i]; + sum -= self; + if (fcmp_gt(sum, 1.0)) + throw ValueError("Sum of migrate rate from one subPop should <= 1"); + // reset to-my-self probability/proportion + self = 1.0 - sum; + + } + if (! simple_case) { + DBG_DO(DBG_MIGRATOR, cerr << "Forward migration matrix is " << migrationRate << endl); + } + + for (size_t from = 0, fromEnd = subPops.size(); from < fromEnd; ++from) { + size_t spFrom = subPops[from]; + // rateSize might be toSize + 1, the last one is from->from + size_t toIndex; + + size_t spSize = pop.subPopSize(spFrom); + + if (m_mode == BY_PROBABILITY) { + WeightedSampler ws(migrationRate[from]); + + // for each individual, migrate according to migration probability + for (IndIterator ind = pop.indIterator(spFrom); ind.valid(); ++ind) { + //toIndex = getRNG().randIntByFreq( rateSize, &migrationRate[from][0] ) ; + toIndex = ws.draw(); + + DBG_ASSERT(toIndex < migrationRate[from].size(), ValueError, + "Return index out of range."); + + if (toIndex < sz && subPops[toIndex] != spFrom) + ind->setInfo(static_cast<double>(subPops[toIndex]), info); + } + } else { + // 2nd, or 3rd method + // first find out how many people will move to other subPop + // then randomly assign individuals to move + vectoru toNum(sz); + // in case that to sub is not in from sub, the last added + // element is not used. sum of toNum is not spSize. + for (size_t i = 0; i < sz; ++i) + toNum[i] = static_cast<ULONG>(spSize * migrationRate[from][i]); + // create a vector and assign indexes, then random shuffle + // and assign info + vectoru toIndices(spSize); + size_t k = 0; + for (size_t i = 0; i < sz && k < spSize; ++i) + for (size_t j = 0; j < toNum[i] && k < spSize; ++j) + toIndices[k++] = subPops[i]; + + // the rest of individuals will stay in their original subpopulation. + while (k < spSize) + toIndices[k++] = spFrom; + + getRNG().randomShuffle(toIndices.begin(), toIndices.end()); + IndIterator ind = pop.indIterator(spFrom); + // set info + for (size_t i = 0; ind.valid(); ++i, ++ind) + // The previous migration_to value, if set by a previous vsp, will be overridden. + ind->setInfo(static_cast<double>(toIndices[i]), info); + } + } // for all subPop. + + // do migration. + size_t oldNumSubPop = pop.numSubPop(); + pop.setSubPopByIndInfo(infoField(0)); + // try to keep number of subpopulations. + if (pop.numSubPop() < oldNumSubPop) { + vectorf split(oldNumSubPop - pop.numSubPop() + 1, 0); + split[0] = static_cast<double>(pop.subPopSize(pop.numSubPop() - 1)); + pop.splitSubPop(pop.numSubPop() - 1, split); + } + DBG_ASSERT(pop.numSubPop() >= oldNumSubPop, RuntimeError, + "Migrator should not decrease number of subpopulations."); + + return true; +} + + struct compareVSP { int operator()(const vspID & v1, const vspID & v2) Modified: trunk/src/migrator.h =================================================================== --- trunk/src/migrator.h 2015-11-01 03:09:25 UTC (rev 4996) +++ trunk/src/migrator.h 2016-01-16 07:13:35 UTC (rev 4997) @@ -33,6 +33,8 @@ #include <list> using std::list; +#include <boost/numeric/ublas/matrix.hpp> + #include <iostream> using std::cout; using std::endl; @@ -164,6 +166,100 @@ }; +/** This operator migrates individuals between all available or specified + * subpopulations, according to a backward migration matrix. It differs from + * \c Migrator in how migration matrixes are interpreted. Due to the limit + * of this model, this operator does not support migration by information + * field, migration by count (\e mode = \c BY_COUNT), migration from virtual + * subpopulations, migration between different number of subpopulations, + * and the creation of new subpopulation, as operator \c Migrator provides. + * + * In contrast to a forward migration matrix where $m_{ij}$ is considered + * the probability (proportion or count) of individuals migrating from subpopulation + * \c i to \c j, elements in a reverse migration matrix $m_{ij}$ is considered + * the probability (proportion or count) of individuals migrating from subpopulation + * \c j to \c i, namely the probability (proportion or count) of individuals + * originats from subpopulation \c j. + * + * If migration is applied by probability, the row of the migration matrix + * corresponding to a destination subpopulation is intepreted as probabilities to + * orignate from each source subpopulation. Each individual's source + * subpopulation is assigned randomly according to these probabilities. Note + * that the probability of originating from the present subpopulation is + * automatically calculated so the corresponding matrix elements are ignored. + * + * If migration is applied by proportion, the row of the migration matrix + * corresponding to a destination subpopulation is intepreted as proportions + * to originate from each source subpopulation. The number of migrants from each + * source subpopulation is determined before random indidividuals are + * chosen to migrate. + * + * Unlike the forward migration matrix that describes how migration should + * be performed, the backward migration matrix describes the result of + * migration. The underlying forward migration matrix is calculated at + * each generation and is in theory not the same across generations. + * + * This operator calculates the corresponding forward migration matrix + * from backward matrix and current population size. This process is not + * always feasible so an error will raise if no valid ending population + * size or forward migration matrix could be determined. Please refer to + * the simuPOP user's guide for an explanation of the theory behind forward + * and backward migration matrices. + */ +class BackwardMigrator : public BaseOperator +{ +public: + /** Create a BackwardMigrator that moves individuals between \e subPop + * subpopulations randomly according to a backward migration matrix \e rate. + * The size of the matrix should match the number of subpopulations. + * + * Depending on the value of parameter \e mode, elements in the migration + * matrix (\e rate) are interpreted as either the probabilities to originate + * from source subpopulations (\e mode = \c BY_PROBABILITY) or proportions of + * individuals originate from the source (virtual) subpopulations (\e mode + * = \c BY_PROPORTION). Migration by count is not supported by this operator. + * + * Please refer to operator \c BaseOperator for a detailed explanation for + * all parameters. + */ + BackwardMigrator(const floatMatrix & rate = floatMatrix(), int mode = BY_PROBABILITY, + int begin = 0, int end = -1, int step = 1, const intList & at = vectori(), + const intList & reps = intList(), const subPopList & subPops = subPopList(), + const stringList & infoFields = vectorstr(1, "migrate_to")); + + /// destructor + virtual ~BackwardMigrator() + { + }; + + /// HIDDEN Deep copy of a Migrator + virtual BaseOperator * clone() const + { + return new BackwardMigrator(*this); + } + + + /// HIDDEN apply the Migrator to populaiton \e pop. + virtual bool apply(Population & pop) const; + + /// HIDDEN + string describe(bool format = true) const; + +protected: + /// migration rate. its meaning is controled by m_mode + const matrixf m_rate; + + boost::numeric::ublas::matrix<double> m_inverse_rate; + + bool m_symmetric_matrix; + + /// asProbability (1), asProportion (2), + const int m_mode; +}; + + + + /** Split a given list of subpopulations according to either sizes of the * resulting subpopulations, proportion of individuals, or an information * field. The resulting subpopulations will have the same name as the This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |