Skip to content

Instantly share code, notes, and snippets.

View mbk0asis's full-sized avatar

Byungkuk Min mbk0asis

  • Korea Research Institute of Bioscience and Biotechnology (KRIBB)
  • Daejeon, S.Korea
  • 18:41 (UTC +09:00)
View GitHub Profile
library(ggplot2)
library(ggpmisc)
p <- ggplot(df2,aes(log2(df2$high_mean),log2(df2$low_mean)),
xlab="log2(Setdb1_high)", ylab="log2(Setdb1_low)")
p + xlim(0,10) + ylim(0,10)+ theme_bw() +
geom_point(size=1,alpha=.3,col="black") +
stat_density2d(aes(fill=..level..,alpha=..level..),geom='polygon',colour='grey50',bins=10, show.legend = F) +
scale_fill_continuous(low="yellow",high="red") +
@mbk0asis
mbk0asis / cufflinks.txt
Last active January 19, 2018 01:19
cufflinks pipeline
library(ggplot2)
All <- read.csv("~/00-NGS/RNAseq/bov/niceM/Sham_Corrected/TopHat_UMD3.1_NCBI/log10_log2_mean.csv")
colnames(All) = c("chr","MI","MN","FI","FN",
"log10_MN_MI","log10_FI_MI","log10_FN_MI",
"log2_MN_MI","log2_FI_MI","log2_FN_MI")
head(All)
# set chr in natural order
All$chr <- factor(All$chr,
levels=c("1","2","3","4","5","6","7","8","9","10",
chr MI MN FI FN log10_MN.MI log10_FI.MI log10_FN.MI log2_MN.MI log2_FI.MI log2_FN.MI
1 4.274425 6.348213 5.055983 4.505403 0.1440066 0.06000955 0.01861402 0.4783797 0.1993474 0.06183444
1 0.000000 0.000000 0.000000 0.000000 0.0000000 0.00000000 0.00000000 0.0000000 0.0000000 0.00000000
1 147.443667 73.833702 99.568167 55.867500 -0.2974644 -0.16910114 -0.41669754 -0.9881555 -0.5617418 -1.38423925
1 5.123198 7.764905 3.524147 3.185655 0.1557689 -0.13144165 -0.16521490 0.5174530 -0.4366397 -0.54883201
1 1.217480 2.133620 1.641105 2.506605 0.1501866 0.07592596 0.19902714 0.4989091 0.2522206 0.66115384
1 8.494963 4.504353 7.199487 7.713527 -0.2367870 -0.06370663 -0.03729933 -0.7865893 -0.2116288 -0.12390568
with(SCNT_TSA, plot(log2FoldChange,-log10(pval),pch=20,main="Volcano plot",
col=rgb(0.6,0.6,0.6, alpha = 0.1)))
with(subset(SCNT_TSA, pval<.05 & abs(log2FoldChange)>1),
points(log2FoldChange,-log10(pval),pch=20,col=rgb(1,0.5,0, alpha = 0.4)))
abline(v=c(-1,1),col="blue",lwd=1,lty=5)
abline(v=c(0,0),col="grey40",lwd=1,lty=2)
abline(c(-log10(0.05),0),col="blue",lwd=1,lty=5)
@mbk0asis
mbk0asis / gist:fda88ab6f8e8cea85023
Last active November 16, 2015 02:47
counting CpG in every line
$ while read line; do echo $line | grep -o 'Any_Pattern' | wc -l; done < Input_File > Result_file
# Example
$ while read line; do echo $line | grep -o 'CG' | wc -l; done < Dlk1_mm10.txt > Dlk1_CpG_density
@mbk0asis
mbk0asis / MA plots
Last active November 24, 2015 04:18
library(cummeRbund)
# set working directory
setwd('/home/bio3/00-NGS/RNAseq/E-MTAB-3037_human_aging_fibroblasts/diffout')
# read cuffdiff results
cuff_data<-readCufflinks()
# DEGs
#############################################################################################
# Commands in GATK 4.0 drastically changed.
# to run 'MarkDuplicates', simply
$ /dir/to/gatk Markduplicates --INPUT test.bam --OUTPUT test.dedupped.bam [options]
# originally
$ picard-tools MarkDuplicates I=test.bam O=test.dedupped.bam [options]
@mbk0asis
mbk0asis / creating_mutant_genomes.txt
Last active February 5, 2016 06:29
creating mutant genomes
# change chr order vcf using 'VCF-TOOLS'
# to check and change order of vcf
$ cat variants.vcf | cut -f 1 | uniq
$ vcf-sort -c variants.vcf > variants_sorted.vcf
$ cat variants.vcf | cut -f 1 | uniq
# download chr fasta files
# Files used here
All_COSMIC_Genes.fasta - mRNA sequences for all cancer genes (normal)
Pongo_abelii.Ensembl.PPYG2.cdna.all.fa - Orangutan mRNAs
CosmicCodingMuts.vcf - mutation information
ID coversion file - Ensembl transcript ID and gene symbols from biomart ( e.g. mart_export.txt)
1. Select high frequency MUT cancer genes (cnt >= 10)
$ cat CosmicCodingMuts_SORTED.vcf | grep -v "#" | grep -v SNV | cut -f 1,2,4,5,8 | \
sed 's/;/\t/g' | cut -f 1,2,3,4,5,6,9,11 | sed 's/=/\t/g' | cut -f 1,2,3,4,6,8,10,12 | \