Created
October 4, 2019 08:01
-
-
Save mbk0asis/ce8f004c2fc625996bc9de82b79fa32c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Vatiants calling using 'bcftools mpileup - htslib' | |
export LC_ALL=C | |
printf "*** SNVcounter\n | |
*** 'samtools, bcftools, and bedtools' must be installed to run this script\n | |
*** Prepare 'reference fasta' and 'SNV list' in BED format\n | |
*** merge consecutive SNVs using\n | |
*** 'sort -k1,1 -k2,2n SNV.bed | mergeBed -c 4 -o collapse'\n | |
*** usage : ./SNVcounter Input.Bam Reference.Fasta mergedSNV.bed\n\n" | |
printf "()()() Processing '$1' \n\n" | |
dir=$(pwd) | |
if [ ! -d "$dir" ]; then | |
mkdir tmp | |
fi | |
cd tmp | |
bcftools mpileup --threads 20 -d 10000000 -Ou -f ../$2 ../$1 | bcftools call --threads -Ov -mv > $1.vcf | |
# to get information on all sites, remove "v" option in bcftools | |
#samtools mpileup -d 10000000 -Buf ../$2 ../$1 | bcftools view -bcg - > $1.raw.bcf | |
# "-B" option to report the base composition at every mapped position | |
grep -v "#" $1.vcf | cut -f 1,2 > $1.id | |
grep -v "#" $1.vcf | cut -f 1,8 |sed 's/;/\t/g' | grep -Po 'DP=\d+' > $1.dp | |
grep -v "#" $1.vcf | cut -f 1,8 |sed 's/;/\t/g' | grep -Po 'DP4=\d+,\d+,\d+,\d+' > $1.dp4 | |
paste $1.id $1.dp $1.dp4 | sed 's/ \|=\|,/\t/g' > $1.paste | |
awk 'OFS="\t"{if($6+$7+$8+$9 >= 100) print $1,$2-1,$2-1,FILENAME,$4,$6+$7,$8+$9}' $1.paste |sed 's/.trimmed.SE.fq.gz.sam.sorted.bam.paste//g' > $1.bed | |
intersectBed -wa -wb -loj -a ../$3 -b $1.bed | cut -f 1,2,3,8,9,10,11 | sort -k1,1 -k2,2n | mergeBed -c 4,5,6,7 -o first,mean,mean,mean > $1.intersect | |
printf "\n()()() Done '$1' !\n\n" | |
cd .. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment