We tested whether MimicrEE2 accurately introduces mutations. Specifically we tested if a) the mutation rate was accurately simulated and b) all sites have an equal chance of mutating.
We generated a population with 5 chromosomes (chr1=2500bp, chr2=1200bp, chr3=700bp, chr4=400bp, chr5=200bp) of size N=10 diploids and set the mutation rate mu=0.0002. In MimicrEE2 mutations are introduced in the gametes, before a new zygote is formed. Next we simulated 250.000 times progeny for this population and counted the mutations occurring in the progeny population (the progeny population also had a N=10). We expect that each site was mutated exactly 1000 times (mu2Nereplicates = 0.0002210*250000).
The following graph shows the counts of mutations observed for each site in our artificial population with N=10 and 5 chromosomes.
This demonstrates that number of mutations is equally distributed among the genomic sites and that the expected number of mutations per site (1000) was accurately reproduced
We tested whether the observed distribution of mutations (see above graphic) significantly deviates from the expected one using a Chi-square test and found that the number of mutations agrees with the expectation (X-squared = 5098.2, df = 4999, p-value = 0.1605).
The following R code was used to simulate a population with N=10 having 5 chromosomes with different size. For this base population we generated 250.000 progeny populations (random mating) and counted the number of mutations occurring in the progeny at each site.
:::Java
package mim2.test.mutationTest;
import mim2.test.recombinationTest.RecombinationWriter;
import mimcore.data.*;
import mimcore.data.BitArray.BitArray;
import mimcore.data.BitArray.BitArrayBuilder;
import mimcore.data.Mutator.IMutator;
import mimcore.data.Mutator.MutatorGenomeWideRate;
import mimcore.data.gpf.fitness.FitnessCalculatorAllEqual;
import mimcore.data.gpf.mating.MatingFunctionRandomMating;
import mimcore.data.gpf.quantitative.GenotypeCalculatorAllEqual;
import mimcore.data.gpf.quantitative.PhenotypeCalculatorAllEqual;
import mimcore.data.haplotypes.HaploidGenome;
import mimcore.data.haplotypes.SNP;
import mimcore.data.haplotypes.SNPCollection;
import mimcore.data.misc.Tuple;
import mimcore.data.recombination.CrossoverGenerator;
import mimcore.data.recombination.RandomAssortmentGenerator;
import mimcore.data.recombination.RecombinationGenerator;
import mimcore.data.recombination.RecombinationWindow;
import mimcore.io.ChromosomeDefinitionReader;
import mimcore.io.recombination.RRRRecFraction;
import mimcore.io.recombination.RRRcMpMb;
import mimcore.misc.MimicreeThreadPool;
import javax.lang.model.type.IntersectionType;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
/**
* Created by robertkofler on 08/11/2017.
*/
public class MutationTester {
public static Population getPopulation()
{
ArrayList<SNP> snps = new ArrayList<SNP>();
// chr1 2500
// chr2 1200
// chr3 700
// chr4 400
// chr5 200
for(int i=0; i<2500; i++) snps.add(new SNP(new GenomicPosition(Chromosome.getChromosome("chr1"),i+1),'A','A','T'));
for(int i=0; i<1200; i++) snps.add(new SNP(new GenomicPosition(Chromosome.getChromosome("chr2"),i+1),'A','A','T'));
for(int i=0; i<700; i++) snps.add(new SNP(new GenomicPosition(Chromosome.getChromosome("chr3"),i+1),'A','A','T'));
for(int i=0; i<400; i++) snps.add(new SNP(new GenomicPosition(Chromosome.getChromosome("chr4"),i+1),'A','A','T'));
for(int i=0; i<200; i++) snps.add(new SNP(new GenomicPosition(Chromosome.getChromosome("chr5"),i+1),'A','A','T'));
SNPCollection sc=new SNPCollection(snps);
ArrayList<Specimen> diploids=new ArrayList<Specimen>();
for(int i=0; i<10; i++)
{
HaploidGenome hap1 =new HaploidGenome(new BitArrayBuilder(5000).getBitArray(),sc);
HaploidGenome hap2 =new HaploidGenome(new BitArrayBuilder(5000).getBitArray(),sc);
DiploidGenome dg=new DiploidGenome(hap1,hap2);
Specimen s=new Specimen(1.0,1.0,1.0, dg);
diploids.add(s);
}
return new Population(diploids);
}
public static void testMutations()
{
MimicreeThreadPool.setThreads(8);
HashMap<GenomicPosition,Integer> stat=new HashMap<GenomicPosition,Integer>();
Population pop=getPopulation();
IMutator mutator=new MutatorGenomeWideRate(0.0002);
RecombinationGenerator recgen=new RecombinationGenerator(new CrossoverGenerator(new ArrayList<RecombinationWindow>()),new ChromosomeDefinitionReader("").getRandomAssortmentGenerator());
for(int i=0;i<250000; i++) {
Population next= pop.getNextGeneration(new GenotypeCalculatorAllEqual(), new PhenotypeCalculatorAllEqual(),
new FitnessCalculatorAllEqual(), new MatingFunctionRandomMating(), recgen, mutator, pop.size());
for(Specimen s: next.getSpecimen())
{
for(SNP snp: s.getGenome().getHaplotypeA().getSNPCollection().getSNPs())
{
GenomicPosition pos=snp.genomicPosition();
if(!stat.containsKey(pos))stat.put(pos,0);
int currentcount=stat.get(pos);
boolean ahasAncestral=s.getGenome().getHaplotypeA().hasAncestral(pos);
boolean bhasAncestral=s.getGenome().getHaplotypeB().hasAncestral(pos);
if(ahasAncestral)currentcount++;
if(bhasAncestral)currentcount++;
stat.put(pos,currentcount);
}
}
}
ArrayList<Tuple<String,Integer>> toprint=new ArrayList<Tuple<String,Integer>>();
for(Map.Entry<GenomicPosition,Integer> me: stat.entrySet())
{
toprint.add(new Tuple<String,Integer>(me.getKey().toString(),me.getValue()));
}
MutationWriter w=new MutationWriter("/Users/robertkofler/dev/mimicree2/theoreyvalidation/mutation/mutations.txt");
w.write(toprint);
}
}
:::R
library(ggplot2)
theme_set(theme_bw())
df<-read.table("/Users/robertkofler/dev/mimicree2/theoreyvalidation/mutation/prep.txt")
names(df)<-c("chr","pos","mutcount")
df$cs<-(df$mutcount-1000)**2/df$mutcount
#df$chr<-as.character(df$chr)
g<-ggplot(df,aes(x=pos,y=mutcount))+geom_point(size=0.5)+facet_grid(.~chr, scales="free_x", space = "free_x")+xlab("Genomic position")+ylim(0,1150)+
scale_x_continuous(breaks=c(0,200,400,700,1000,1500,2000,2500))
# 0.0002 * 20 *250000
pdf("/Users/robertkofler/dev/mimicree2/theoreyvalidation/mutation/mutation-distribution.pdf",width=8,height=4)
plot(g)
dev.off()
#dev.copy2eps(file="/Users/robertkofler/dev/mimicree2/concept-drawings/data/gauss-example.eps")
# Chisquare test
chisq.test(df$mutcount,p=rep(0.0002,5000))