Created
November 1, 2017 10:08
-
-
Save jodyphelan/751a00a239bffed21a0468001b418819 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export THREADS=40 | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR103/003/ERR1035223/ERR1035223_1.fastq.gz | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR103/003/ERR1035223/ERR1035223_2.fastq.gz | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz | |
wget ftp://ftp.ensemblgenomes.org/pub/release-32/bacteria//fasta/bacteria_0_collection/mycobacterium_tuberculosis_h37rv/dna/Mycobacterium_tuberculosis_h37rv.ASM19595v2.dna.toplevel.fa.gz | |
gunzip Mycobacterium_tuberculosis_h37rv.ASM19595v2.dna.toplevel.fa.gz | |
mv Mycobacterium_tuberculosis_h37rv.ASM19595v2.dna.toplevel.fa H37Rv.fa | |
bwa index H37Rv.fa | |
bwa mem -t $THREADS -R '@RG\tID:ERR1664619\tSM:ERR1664619\tPL:Illumina' H37Rv.fa ERR1664619_1.fastq.gz ERR1664619_2.fastq.gz | samtools view -b -@ $THREADS - | samtools sort -@ $THREADS -o ERR1664619.bam - | |
bwa mem -t $THREADS -R '@RG\tID:ERR1035223\tSM:ERR1035223\tPL:Illumina' H37Rv.fa ERR1035223_1.fastq.gz ERR1035223_2.fastq.gz | samtools view -b -@ $THREADS - | samtools sort -@ $THREADS -o ERR1035223.bam - | |
samtools mpileup -ugf H37Rv.fa ERR1664619.bam -t DP | bcftools call --threads $THREADS --ploidy 1 -g 10 -m -O b -o ERR1664619.bcf && bcftools index ERR1664619.bcf | |
samtools mpileup -ugf H37Rv.fa ERR1035223.bam -t DP | bcftools call --threads $THREADS --ploidy 1 -g 10 -m -O b -o ERR1035223.bcf && bcftools index ERR1035223.bcf | |
### Merging from BCF | |
# check output | |
bcftools merge -g H37Rv.fa ERR1664619.bcf ERR1035223.bcf | awk '$2==1' | |
### Merging from VCF | |
bcftools view ERR1664619.bcf -o ERR1664619.vcf.gz -O z && bcftools index ERR1664619.vcf.gz | |
bcftools view ERR1035223.bcf -o ERR1035223.vcf.gz -O z && bcftools index ERR1035223.vcf.gz | |
# check output | |
bcftools merge -g H37Rv.fa ERR1664619.vcf.gz ERR1035223.vcf.gz | awk '$2==1' | |
### Merging from back converted BCF | |
bcftools view ERR1664619.bcf -o ERR1664619.vcf -O v && bcftools index -f ERR1664619.bcf | |
bcftools view ERR1035223.bcf -o ERR1035223.vcf -O v && bcftools index -f ERR1035223.bcf | |
# check output | |
bcftools merge -g H37Rv.fa ERR1664619.vcf.gz ERR1035223.vcf.gz | awk '$2==1' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment