Here is my Makefile to run BALSA, but I'm blocked at running the
alignment by a CUDA MALLOC error, so the Makefile is not completely debugged.
# Makefile for Balsa Cuda analysis of mm10 sequences# FQLINES in each split fastq file, multiple of four, for pair-multiFQLINES=20000000BPATH=~/Downloads/balsax/balsa/balsa_cuda_6.5_GTX9x0
REF=reference/mm10.fa
# $REFI is a just prefix and no file of that name should exist. REFI=$(REF).index
REFILIST=$(REFI).amb $(REFI).ann $(REFI).bwt $(REFI).fmv \$(REFI).lkt $(REFI).pac $(REFI).rev.bwt $(REFI).rev.fmv \$(REFI).rev.lkt$(REFI).rev.pac$(REFI).sa$(REFI).traREFIGPULIST=$(REFI).fmv.mmap $(REFI).rev.fmv.mmap $(REFI).pac.mmap
VCFPATH=indels-snps
IVCF=mgp.v4.indels.dbSNP.vcf
SVCF=mgp.v4.snps.dbSNP.vcf
FASTQD=fastq
# fastq file names without the .fastq extension# (R1_001, not R1_001.fastq)R1=index39_CTATAC_L001-L002_R1_001
R2=index39_CTATAC_L001-L002_R2_001
SOAPPATH=~/Downloads/soap3-dp-5.5
# Obtain the gene region list manually from UCSC browser - whole gene, known genes BED fileGRL=mm10-gene-region-list
# Should not have to change anything below this point.soap3-dp-builder.ini :$(SOAPPATH)/soap3-dp-builder.ini
ln -sf $(SOAPPATH)/soap3-dp-builder.ini .
#Build the 2BWT index.$(REFILIST) :soap3-dp-builder.ini$(REF)time$(SOAPPATH)/soap3-dp-builder $(REF)#Convert the 2BWT index to the GPU-2BWT index, just a few seconds$(REFGPULIST) :$(REFILIST)time$(SOAPPATH)/BGS-Build $(REFI)# Compute SnpDB index, # $(BPATH)/SnpDBBuilder $(REFI) <VCF> <Output Prefix>mm10.snpDB.1list mm10.snpDB.bv snpdbtest :$(REFILIST)$(VCFPATH)/$(SVCF)time$(BPATH)/SnpDBBuilder $(REFI)$(VCFPATH)/$(SVCF) mm10.snpDB
# Compute IndelDB index, about 20 sec for mm10 # IndelDBBuilder <2BWT Index> <VCF> <Output file name>mm10.idelDB :$(REFILIST)$(VCFPATH)/$(IVCF)time$(BPATH)/IndelDBBuilder $(REFI)$(VCFPATH)/$(IVCF) mm10
# Download $SVCF.gz from ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v4.snps.dbSNP.vcf.gz$(VCFPATH)/$(SVCF) svcf :
wget ftp://ftp-mouse.sanger.ac.uk/current_snps/$(SVCF).gz
gunzip $(SVCF).gz
mv $(SVCF)$(VCFPATH)# Download $IVCF.gz from ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v4.indels.dbSNP.vcf.gz$(VCFPATH)/$(IVCF) ivcf :
wget ftp://ftp-mouse.sanger.ac.uk/current_snps/$(IVCF).gz
gunzip $(IVCF).gz
mv $(IVCF)$(VCFPATH)# Build region index from the list file obtained with the UCSC genome browser# with "known genes" BED file. Takes just a few seconds# RegionIndexBuilder <2BWT Index> <Input region list file> <Output List File> <Input File Format>$(GRL).index :$(GRL).bedtime$(BPATH)/RegionIndexBuilder $(REFI)$(GRL).bed $(GRL).index -bed
#Align the unpaired reads to the genome# make sam output (-b 2) bam (-b 3)# balsa pair <2BWT Index> <dbSnp Index> <Indel DB> <General Region List> <First FASTQ File> <Second FASTQ FILE> <result Prefix> [options]# balsa pair-multi <2BWT Index> <dbSnp Index> <Indel DB> <General Region List> <Read File Pairs List> <result Prefix> [options]reads_1.fq.out:soap3-dp.inipairs.listtime$(BPATH)/balsa pair-multi $(REFI) mm10.snpDB mm10.indelDB mm10-gene-region-list.index pairs.list result -b 3pairs.list :$(FASTQD)/$(R1).fastq$(FASTQD)/$(R2).fastqcd$(FASTQD); split -l $(FQLINES)$(R1).fastq $(R1).split
cd$(FASTQD); split -l $(FQLINES)$(R2).fastq $(R2).split
ls $(FASTQD)/$(R1).split* > $(FASTQD)/r1.list
ls $(FASTQD)/$(R2).split* > $(FASTQD)/r2.list
paste $(FASTQD)/r1.list $(FASTQD)/r2.list > $(FASTQD)/files.list
perl -pe 's@\n@\t400\t600\tresult\n@'$(FASTQD)/files.list > pairs.list
clean.pairs.list :
rm -f $(FASTQD)/*split* $(FASTQD)/files.list $(FASTQD)/r?.list
rm -f pairs.list
Last edit: robertthebob2 2015-01-26
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Here is my Makefile to run BALSA, but I'm blocked at running the
alignment by a CUDA MALLOC error, so the Makefile is not completely debugged.
Last edit: robertthebob2 2015-01-26