Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mbk0asis/44b01bb5fcef45b7634ba9de115ad0eb to your computer and use it in GitHub Desktop.
Save mbk0asis/44b01bb5fcef45b7634ba9de115ad0eb to your computer and use it in GitHub Desktop.
$ trim_galore *
$ bismark_genome_preparation --bowtie2 /genome/dir/
$ bismark -u 10000 --non_directional --bowtie2 -p 8 path/to/BSgenome/dir/ Read.fq --score_min L,0,-1
# '-u' = optional option, map only randomly chosen 10000 reads
# bismark_methylation_extractor
$ for b in ./*.rmdup.bam; do bismark_methylation_extractor --bedGraph --multicore 8 $b ; done
# "--bedGraph" option produces .bedGraph and .cov files
# .bedGraph (chr,CpG location,% methyl)
track type=bedGraph
10 79 80 0
10 102 103 25
10 124 125 0
10 154 155 0
10 170 171 0
10 216 217 0
10 217 218 50
10 239 240 0
10 240 241 0
# .cov (chr,CpG location,% methyl,methylated cnt,unmethylated cnt)
10 80 80 0 0 3
10 103 103 25 1 3
10 125 125 0 0 2
10 155 155 0 0 3
10 171 171 0 0 2
10 217 217 0 0 5
10 218 218 50 1 1
10 240 240 0 0 4
10 241 241 0 0 3
10 263 263 25 1 3
########################################################
# sliding window analysis with .cov file
# BEDTOOLS - intersectBed, groupBy
# make 10kb windows sliding 5kb
$ bedtools makewindows -g ~/00-NGS/Annotation/Bos_taurus_NCBI_UMD_3.1.1/Bowtie2Index/genome.fa.fai \
-w 10000 -s 5000 > umd311_10k_5K_windows
# find intersecting regions (intersectBed; -wa -wb print all columns in file a and b)
# sum read counts (groupBy)
$ intersectBed -a umd311_10k_5K_windows -b IVF.ICM.03.sorted.bam.rmdup.bismark.cov -wa -wb \
| groupBy -g 1,2,3 -c 8,9 -o sum | awk 'BEGIN{FS=OFS="\t"}{print "chr"$1,$2,$3,$4/($4+$5)*100}' \
|sed 's/chrMT/chrM/g' > IVF.ICM.10k.5k
# Result
chr start end %met
chr10 0 10000 6.85096
chr10 5000 15000 13.5496
chr10 10000 20000 33.9535
chr10 15000 25000 45.8824
chr10 20000 30000 40.0794
# Add bedGraph header to upload on UCSC genome browser
$ echo "track type=bedGraph name=IVF.ICM.10k.5k description=IVF.ICM.10k.5k color=255,10,10" \
| cat - IVF.ICM.10k.5k > IVF.ICM.10k.5k.bed
###################################################################
# sliding window analysis - CpG counts per window
#
# count CpG counts per window --> select windows with >=10 CpGs --> save as 'bed'
$ intersectBed -wa -a umd311_1k.500_windows -b IVF.ICM.03.sorted.bedGraph -c \
| awk '$4 >= 10' > IVF.ICM.03.sorted.bam.1k.500.over10cpg.bed
# result bed file
chr start end CpG/window
10 0 1000 32
10 500 1500 24
10 1000 2000 39
10 1500 2500 41
10 2000 3000 33
# find interseting windows with .cov file
$ intersectBed -wa -wb \
-a IVF.ICM.03.sorted.bam.1k.500.over10cpg.bed \
-b IVF.ICM.03.sorted.bismark.cov \
| groupBy -g 1,2,3,4 -c 9,10 -o sum
> IVF.ICM.03.sorted.bam.1k.500.over10cpg.bed
# chr start end CpG_cnt met unmet
10 0 1000 32 17 137
10 500 1500 24 2 54
10 1000 2000 39 4 223
10 1500 2500 41 16 297
10 2000 3000 33 14 143
# calculate %met
$ awk 'BEGIN{FS=OFS="\t"}{print "chr"$1,$2,$3,100*$4/($4+$5)}' input | sed 's/chrMT/chrM/g' > output
#chr start end %met
chr10 0 1000 65.3061
chr10 500 1500 92.3077
chr10 1000 2000 90.6977
chr10 1500 2500 71.9298
chr10 2000 3000 70.2128
chr10 2500 3500 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment