Byungkuk Min mbk0asis

mbk0asis / ggplot2 - scatter plot

Last active May 10, 2018 02:51

	library(ggplot2)
	library(ggpmisc)

	p <- ggplot(df2,aes(log2(df2$high_mean),log2(df2$low_mean)),
	xlab="log2(Setdb1_high)", ylab="log2(Setdb1_low)")

	p + xlim(0,10) + ylim(0,10)+ theme_bw() +
	geom_point(size=1,alpha=.3,col="black") +
	stat_density2d(aes(fill=..level..,alpha=..level..),geom='polygon',colour='grey50',bins=10, show.legend = F) +
	scale_fill_continuous(low="yellow",high="red") +

mbk0asis / cufflinks.txt

Last active January 19, 2018 01:19

cufflinks pipeline


	# gather "accepted_hits.bam" to one directory
	$ find . -name "accepted_hits.bam" > bam_list
	$ while read line; do cp $line $(dirname $line).bam ; done < bam_list


	# rename bam files to shorten the names (e.g. SAMPLE.trim.cut.fq.gz.bam --> SAMPLE.bam)
	$ rename 's/trim.cut.fq.gz.//g' *

mbk0asis / ggplot2 geom_jitter

Last active September 1, 2015 04:15

	library(ggplot2)
	All <- read.csv("~/00-NGS/RNAseq/bov/niceM/Sham_Corrected/TopHat_UMD3.1_NCBI/log10_log2_mean.csv")
	colnames(All) = c("chr","MI","MN","FI","FN",
	"log10_MN_MI","log10_FI_MI","log10_FN_MI",
	"log2_MN_MI","log2_FI_MI","log2_FN_MI")
	head(All)

	# set chr in natural order
	All$chr <- factor(All$chr,
	levels=c("1","2","3","4","5","6","7","8","9","10",

mbk0asis / data for jitter plot

Last active August 31, 2015 09:34

	chr MI MN FI FN log10_MN.MI log10_FI.MI log10_FN.MI log2_MN.MI log2_FI.MI log2_FN.MI
	1 4.274425 6.348213 5.055983 4.505403 0.1440066 0.06000955 0.01861402 0.4783797 0.1993474 0.06183444
	1 0.000000 0.000000 0.000000 0.000000 0.0000000 0.00000000 0.00000000 0.0000000 0.0000000 0.00000000
	1 147.443667 73.833702 99.568167 55.867500 -0.2974644 -0.16910114 -0.41669754 -0.9881555 -0.5617418 -1.38423925
	1 5.123198 7.764905 3.524147 3.185655 0.1557689 -0.13144165 -0.16521490 0.5174530 -0.4366397 -0.54883201
	1 1.217480 2.133620 1.641105 2.506605 0.1501866 0.07592596 0.19902714 0.4989091 0.2522206 0.66115384
	1 8.494963 4.504353 7.199487 7.713527 -0.2367870 -0.06370663 -0.03729933 -0.7865893 -0.2116288 -0.12390568

mbk0asis / volcano_plot_R

Last active September 25, 2015 07:35

	with(SCNT_TSA, plot(log2FoldChange,-log10(pval),pch=20,main="Volcano plot",
	col=rgb(0.6,0.6,0.6, alpha = 0.1)))

	with(subset(SCNT_TSA, pval<.05 & abs(log2FoldChange)>1),
	points(log2FoldChange,-log10(pval),pch=20,col=rgb(1,0.5,0, alpha = 0.4)))

	abline(v=c(-1,1),col="blue",lwd=1,lty=5)
	abline(v=c(0,0),col="grey40",lwd=1,lty=2)
	abline(c(-log10(0.05),0),col="blue",lwd=1,lty=5)

mbk0asis / gist:fda88ab6f8e8cea85023

Last active November 16, 2015 02:47

counting CpG in every line


	$ while read line; do echo $line \| grep -o 'Any_Pattern' \| wc -l; done < Input_File > Result_file

	# Example
	$ while read line; do echo $line \| grep -o 'CG' \| wc -l; done < Dlk1_mm10.txt > Dlk1_CpG_density

mbk0asis / MA plots

Last active November 24, 2015 04:18

	library(cummeRbund)

	# set working directory

	setwd('/home/bio3/00-NGS/RNAseq/E-MTAB-3037_human_aging_fibroblasts/diffout')

	# read cuffdiff results
	cuff_data<-readCufflinks()

	# DEGs

mbk0asis / Calling Variants in RNAseq - STAR,PICARD,GATK

Last active July 26, 2024 11:42

	#############################################################################################
	# Commands in GATK 4.0 drastically changed.

	# to run 'MarkDuplicates', simply
	$ /dir/to/gatk Markduplicates --INPUT test.bam --OUTPUT test.dedupped.bam [options]

	# originally
	$ picard-tools MarkDuplicates I=test.bam O=test.dedupped.bam [options]

mbk0asis / creating_mutant_genomes.txt

Last active February 5, 2016 06:29

creating mutant genomes


	# change chr order vcf using 'VCF-TOOLS'
	# to check and change order of vcf
	$ cat variants.vcf \| cut -f 1 \| uniq
	$ vcf-sort -c variants.vcf > variants_sorted.vcf
	$ cat variants.vcf \| cut -f 1 \| uniq



	# download chr fasta files

mbk0asis / multiplex PCR target sites

Last active December 10, 2016 08:42

	# Files used here
	All_COSMIC_Genes.fasta - mRNA sequences for all cancer genes (normal)
	Pongo_abelii.Ensembl.PPYG2.cdna.all.fa - Orangutan mRNAs
	CosmicCodingMuts.vcf - mutation information
	ID coversion file - Ensembl transcript ID and gene symbols from biomart ( e.g. mart_export.txt)


	1. Select high frequency MUT cancer genes (cnt >= 10)
	$ cat CosmicCodingMuts_SORTED.vcf \| grep -v "#" \| grep -v SNV \| cut -f 1,2,4,5,8 \| \
	sed 's/;/\t/g' \| cut -f 1,2,3,4,5,6,9,11 \| sed 's/=/\t/g' \| cut -f 1,2,3,4,6,8,10,12 \| \