Created
July 26, 2019 10:34
-
-
Save tkrahn/28e15c52a74315975d2ed98e74dee9b4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
clear | |
YSEQ_ID=${PWD##*/} | |
# Exract umapped reads from the BAM file | |
samtools view -b ${YSEQ_ID}_bwa-mem_hg19_sorted.bam > ${YSEQ_ID}_unmapped.bam '*' | |
bedtools bamtofastq -i ${YSEQ_ID}_unmapped.bam -fq ${READS_1} -fq2 ${READS_2} | |
# Check if there are any bacteria? | |
RDP_CLASSIFIER="/home/thomas/src/16S/RDPTools/classifier.jar" | |
java -Xmx1g -jar ${RDP_CLASSIFIER} classify -o ${YSEQ_ID}_classifier.txt -h ${YSEQ_ID}_hierarchical.txt ${READS_1} | |
# Align unmapped reads to Streptococcus oralis (the most common oral bacterium) | |
REF="/genomes/0/refseq/bacteria_genomes/Streptococcus_oralis.fa" | |
bwa mem -t 32 -M ${REF} ${READS_1} ${READS_2} | \ | |
samtools view -b -F 4 -t ${REF} -o ${YSEQ_ID}_Streptococcus_oralis.bam - | |
samtools sort ${YSEQ_ID}_Streptococcus_oralis.bam -o ${YSEQ_ID}_Streptococcus_oralis_sorted.bam | |
samtools index ${YSEQ_ID}_Streptococcus_oralis_sorted.bam | |
samtools mpileup -r CP019562.1 -u -C 50 -v -f ${REF} ${YSEQ_ID}_Streptococcus_oralis_sorted.bam | \ | |
bcftools call -O z -v -m -P 0 > ${YSEQ_ID}_Streptococcus_oralis.vcf.gz | |
tabix ${YSEQ_ID}_Streptococcus_oralis.vcf.gz | |
samtools faidx ${REF} CP019562.1 | \ | |
bcftools consensus ${YSEQ_ID}_Streptococcus_oralis.vcf.gz -o ${YSEQ_ID}_Streptococcus_oralis.fasta | |
samtools idxstats ${YSEQ_ID}_Streptococcus_oralis_sorted.bam | |
# Clean up no longer needed files | |
rm -f ${YSEQ_ID}_unmapped.bam | |
rm -f ${READS_1} ${READS_2} | |
rm -f ${YSEQ_ID}_classifier.txt | |
rm -f ${YSEQ_ID}_Streptococcus_oralis.bam |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment