Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Last active June 21, 2016 06:02
Show Gist options
  • Save mbk0asis/38e6edb9513b4778cd0263195c8501bd to your computer and use it in GitHub Desktop.
Save mbk0asis/38e6edb9513b4778cd0263195c8501bd to your computer and use it in GitHub Desktop.
#!/bin/bash
clear
printf '\n\n # cov2bedGraph_highCpG # \n\n # This will convert .cov files from "bismark_methylation_extractor" into .bedGraph files \n\n # Methylation percent will be calculated in selected windows containing specified number of CpGs \n\n # Change reference genome directory before start'
printf '\n\n\n\n\n\n <><><> Sliding Window Parameters <><><> \n\n'
printf '\n\n >>> Enter window size in bp : \n\n'
read win
printf '\n\n >>> Enter slide size in bp : \n\n'
read slide
printf '\n\n >>> Direcotry containing "genome.fa.fai" \n\n'
read genome
printf '\n\n >>> Enter minimum CpG count per window; \n\n'
read cpg
printf '\n\n >>> Set bedGraph color in RGB format (e.g. 255,0,0 = RED; 0,255,0 = GREEN; 0,0,255 = BLUE)\n\n\n'
read color
if [ -d "tmp/" ]; then
rm -r tmp/
else
printf '\n\n ()()() RUNNING ()()()\n\n'
mkdir tmp
cd tmp
cp ../*.cov .
# make sliding windows
bedtools makewindows -g $genome/genome.fa.fai -w $win -s $slide > $win.$slide.windows
# select high CpG windows
for c in ./*.cov
do
intersectBed -wa -a $win.$slide.windows -b $c -c | awk '$4 >= '$cpg > $c.intersect
done
# calculate %met in selected windows
for c in ./*.cov
do
n=$(basename $c)
intersectBed -wa -wb -a $n.intersect -b $n | groupBy -g 1,2,3 -c 8,9 -o sum,sum | awk 'BEGIN{FS=OFS="\t"}{print "chr"$1,$2,$3,100*$4/($4+$5)}' | sed 's/chrMT/chrM/g' > $c.bed
done
# add bedGraph header
for b in ./*.bed
do
n=$(basename $b)
printf 'track type=bedGraph name='$n.$win.$slide.$cpg' description='$n.$win.$slide.$cpg' color='$color'\n' | cat - $b > ../$n.$win.$slide.$cpg.bedGraph
done
fi
cd ..
rm -r tmp/
clear
printf '\n\n >>> DONE! \n\n'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment