Here is my Makefile to run BALSA, but I'm blocked at running the alignment by a CUDA MALLOC error, so the Makefile is not completely debugged.
# Makefile for Balsa Cuda analysis of mm10 sequences # FQLINES in each split fastq file, multiple of four, for pair-multi FQLINES=20000000 BPATH=~/Downloads/balsax/balsa/balsa_cuda_6.5_GTX9x0 REF=reference/mm10.fa # $REFI is a just prefix and no file of that name should exist. REFI=$(REF).index REFILIST=$(REFI).amb $(REFI).ann $(REFI).bwt $(REFI).fmv \ $(REFI).lkt $(REFI).pac $(REFI).rev.bwt $(REFI).rev.fmv \ $(REFI).rev.lkt $(REFI).rev.pac $(REFI).sa $(REFI).tra REFIGPULIST=$(REFI).fmv.mmap $(REFI).rev.fmv.mmap $(REFI).pac.mmap VCFPATH=indels-snps IVCF=mgp.v4.indels.dbSNP.vcf SVCF=mgp.v4.snps.dbSNP.vcf FASTQD=fastq # fastq file names without the .fastq extension # (R1_001, not R1_001.fastq) R1=index39_CTATAC_L001-L002_R1_001 R2=index39_CTATAC_L001-L002_R2_001 SOAPPATH=~/Downloads/soap3-dp-5.5 # Obtain the gene region list manually from UCSC browser - whole gene, known genes BED file GRL=mm10-gene-region-list # Should not have to change anything below this point. soap3-dp-builder.ini : $(SOAPPATH)/soap3-dp-builder.ini ln -sf $(SOAPPATH)/soap3-dp-builder.ini . #Build the 2BWT index. $(REFILIST) : soap3-dp-builder.ini $(REF) time $(SOAPPATH)/soap3-dp-builder $(REF) #Convert the 2BWT index to the GPU-2BWT index, just a few seconds $(REFGPULIST) : $(REFILIST) time $(SOAPPATH)/BGS-Build $(REFI) # Compute SnpDB index, # $(BPATH)/SnpDBBuilder $(REFI) <VCF> <Output Prefix> mm10.snpDB.1list mm10.snpDB.bv snpdbtest : $(REFILIST) $(VCFPATH)/$(SVCF) time $(BPATH)/SnpDBBuilder $(REFI) $(VCFPATH)/$(SVCF) mm10.snpDB # Compute IndelDB index, about 20 sec for mm10 # IndelDBBuilder <2BWT Index> <VCF> <Output file name> mm10.idelDB : $(REFILIST) $(VCFPATH)/$(IVCF) time $(BPATH)/IndelDBBuilder $(REFI) $(VCFPATH)/$(IVCF) mm10 # Download $SVCF.gz from ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v4.snps.dbSNP.vcf.gz $(VCFPATH)/$(SVCF) svcf : wget ftp://ftp-mouse.sanger.ac.uk/current_snps/$(SVCF).gz gunzip $(SVCF).gz mv $(SVCF) $(VCFPATH) # Download $IVCF.gz from ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v4.indels.dbSNP.vcf.gz $(VCFPATH)/$(IVCF) ivcf : wget ftp://ftp-mouse.sanger.ac.uk/current_snps/$(IVCF).gz gunzip $(IVCF).gz mv $(IVCF) $(VCFPATH) # Build region index from the list file obtained with the UCSC genome browser # with "known genes" BED file. Takes just a few seconds # RegionIndexBuilder <2BWT Index> <Input region list file> <Output List File> <Input File Format> $(GRL).index : $(GRL).bed time $(BPATH)/RegionIndexBuilder $(REFI) $(GRL).bed $(GRL).index -bed #Align the unpaired reads to the genome # make sam output (-b 2) bam (-b 3) # balsa pair <2BWT Index> <dbSnp Index> <Indel DB> <General Region List> <First FASTQ File> <Second FASTQ FILE> <result Prefix> [options] # balsa pair-multi <2BWT Index> <dbSnp Index> <Indel DB> <General Region List> <Read File Pairs List> <result Prefix> [options] reads_1.fq.out: soap3-dp.ini pairs.list time $(BPATH)/balsa pair-multi $(REFI) mm10.snpDB mm10.indelDB mm10-gene-region-list.index pairs.list result -b 3 pairs.list : $(FASTQD)/$(R1).fastq $(FASTQD)/$(R2).fastq cd $(FASTQD); split -l $(FQLINES) $(R1).fastq $(R1).split cd $(FASTQD); split -l $(FQLINES) $(R2).fastq $(R2).split ls $(FASTQD)/$(R1).split* > $(FASTQD)/r1.list ls $(FASTQD)/$(R2).split* > $(FASTQD)/r2.list paste $(FASTQD)/r1.list $(FASTQD)/r2.list > $(FASTQD)/files.list perl -pe 's@\n@\t400\t600\tresult\n@' $(FASTQD)/files.list > pairs.list clean.pairs.list : rm -f $(FASTQD)/*split* $(FASTQD)/files.list $(FASTQD)/r?.list rm -f pairs.list
Log in to post a comment.
Here is my Makefile to run BALSA, but I'm blocked at running the
alignment by a CUDA MALLOC error, so the Makefile is not completely debugged.
Last edit: robertthebob2 2015-01-26