Last active
June 3, 2016 04:23
-
-
Save mbk0asis/44b01bb5fcef45b7634ba9de115ad0eb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ trim_galore * | |
$ bismark_genome_preparation --bowtie2 /genome/dir/ | |
$ bismark -u 10000 --non_directional --bowtie2 -p 8 path/to/BSgenome/dir/ Read.fq --score_min L,0,-1 | |
# '-u' = optional option, map only randomly chosen 10000 reads | |
# bismark_methylation_extractor | |
$ for b in ./*.rmdup.bam; do bismark_methylation_extractor --bedGraph --multicore 8 $b ; done | |
# "--bedGraph" option produces .bedGraph and .cov files | |
# .bedGraph (chr,CpG location,% methyl) | |
track type=bedGraph | |
10 79 80 0 | |
10 102 103 25 | |
10 124 125 0 | |
10 154 155 0 | |
10 170 171 0 | |
10 216 217 0 | |
10 217 218 50 | |
10 239 240 0 | |
10 240 241 0 | |
# .cov (chr,CpG location,% methyl,methylated cnt,unmethylated cnt) | |
10 80 80 0 0 3 | |
10 103 103 25 1 3 | |
10 125 125 0 0 2 | |
10 155 155 0 0 3 | |
10 171 171 0 0 2 | |
10 217 217 0 0 5 | |
10 218 218 50 1 1 | |
10 240 240 0 0 4 | |
10 241 241 0 0 3 | |
10 263 263 25 1 3 | |
######################################################## | |
# sliding window analysis with .cov file | |
# BEDTOOLS - intersectBed, groupBy | |
# make 10kb windows sliding 5kb | |
$ bedtools makewindows -g ~/00-NGS/Annotation/Bos_taurus_NCBI_UMD_3.1.1/Bowtie2Index/genome.fa.fai \ | |
-w 10000 -s 5000 > umd311_10k_5K_windows | |
# find intersecting regions (intersectBed; -wa -wb print all columns in file a and b) | |
# sum read counts (groupBy) | |
$ intersectBed -a umd311_10k_5K_windows -b IVF.ICM.03.sorted.bam.rmdup.bismark.cov -wa -wb \ | |
| groupBy -g 1,2,3 -c 8,9 -o sum | awk 'BEGIN{FS=OFS="\t"}{print "chr"$1,$2,$3,$4/($4+$5)*100}' \ | |
|sed 's/chrMT/chrM/g' > IVF.ICM.10k.5k | |
# Result | |
chr start end %met | |
chr10 0 10000 6.85096 | |
chr10 5000 15000 13.5496 | |
chr10 10000 20000 33.9535 | |
chr10 15000 25000 45.8824 | |
chr10 20000 30000 40.0794 | |
# Add bedGraph header to upload on UCSC genome browser | |
$ echo "track type=bedGraph name=IVF.ICM.10k.5k description=IVF.ICM.10k.5k color=255,10,10" \ | |
| cat - IVF.ICM.10k.5k > IVF.ICM.10k.5k.bed | |
################################################################### | |
# sliding window analysis - CpG counts per window | |
# | |
# count CpG counts per window --> select windows with >=10 CpGs --> save as 'bed' | |
$ intersectBed -wa -a umd311_1k.500_windows -b IVF.ICM.03.sorted.bedGraph -c \ | |
| awk '$4 >= 10' > IVF.ICM.03.sorted.bam.1k.500.over10cpg.bed | |
# result bed file | |
chr start end CpG/window | |
10 0 1000 32 | |
10 500 1500 24 | |
10 1000 2000 39 | |
10 1500 2500 41 | |
10 2000 3000 33 | |
# find interseting windows with .cov file | |
$ intersectBed -wa -wb \ | |
-a IVF.ICM.03.sorted.bam.1k.500.over10cpg.bed \ | |
-b IVF.ICM.03.sorted.bismark.cov \ | |
| groupBy -g 1,2,3,4 -c 9,10 -o sum | |
> IVF.ICM.03.sorted.bam.1k.500.over10cpg.bed | |
# chr start end CpG_cnt met unmet | |
10 0 1000 32 17 137 | |
10 500 1500 24 2 54 | |
10 1000 2000 39 4 223 | |
10 1500 2500 41 16 297 | |
10 2000 3000 33 14 143 | |
# calculate %met | |
$ awk 'BEGIN{FS=OFS="\t"}{print "chr"$1,$2,$3,100*$4/($4+$5)}' input | sed 's/chrMT/chrM/g' > output | |
#chr start end %met | |
chr10 0 1000 65.3061 | |
chr10 500 1500 92.3077 | |
chr10 1000 2000 90.6977 | |
chr10 1500 2500 71.9298 | |
chr10 2000 3000 70.2128 | |
chr10 2500 3500 100 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment