Byungkuk Min mbk0asis

mbk0asis / configure R with BLAS

Last active November 16, 2018 00:35

configure R with BLAS

	# Ubuntu 16.04
	# R-3.4.1


	1. Download R source package

	2. Install dependencies
	$ sudo apt-get install fort77 xorg-dev liblzma-dev libblas-dev gfortran gcc-multilib gobjc++ aptitude libreadline-dev

	3. Install 'open-blas'

mbk0asis / R_saving_multiple_plots

Created July 24, 2017 02:54

R_saving_multiple_plots


	# USING "ggplot2" and "for loop"

	types <- data.frame(c(rep("breast.C",793),rep("breast.N",97),
	rep("colon.C",313),rep("colon.N",38),
	rep("kidney.C",324),rep("kidney.N",160),
	rep("liver.C",377),rep("liver.N",50),
	rep("lung.C",473),rep("lung.N",32),
	rep("prostate.C",502),rep("prostate.N",50)))
	colnames(types) <- "types"

mbk0asis / various boxplots

Created July 25, 2017 01:45

	dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/27k_Set2/liver/liver.set2.boxplot..csv",
	header = T)
	dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T)
	dim(dta)

	##################
	types <- data.frame(c(rep("breast.C",793),rep("breast.N",97),
	rep("colon.C",313),rep("colon.N",38),
	rep("kidney.C",324),rep("kidney.N",160),
	rep("liver.C",377),rep("liver.N",50),

mbk0asis / One-way ANOVA in R

Last active October 12, 2017 01:51

	# data loading
	dta <- read.csv("file:///C:/Users/bk/Desktop/test2.csv",header = T)
	dta

	# boxplot data
	library(ggplot2)
	ggplot(dta, aes(x=group,y=count)) +
	geom_boxplot()

	# linear regression model for data

mbk0asis / Adding a column of row means to the original data frame

Last active November 7, 2017 09:26


	> dta$Mean <- rowMeans(dta)

mbk0asis / linear modeling

Last active November 9, 2017 09:02


	# drawing a scatter plot and regression line

	# draw a scatter plot
	library(LSD)
	heatscatter( dta$a, dta$b, cor = TRUE, method = "pearson" )

	# add regression line
	abline( lm( dta$b ~ dta$a ) ) # switch columns (a<-->b)

mbk0asis / kmeans clustering, R

Last active August 24, 2020 07:35

	dta <- read.csv("file:///C:/Users/bk/Desktop/TEST_data.csv",header = T, row.names = 1)
	head(dta)
	cl <- kmeans(cntNorm,6)
	cluster<-cl$cluster
	table(cluster)

	rNames <- rownames(cnrNorm)
	df<-data.frame(cntNorm, rNames, cluster) # attach cluster info on the data frame
	head(df)

mbk0asis / deepTools

Last active April 6, 2018 05:24

	# Computing matrix
	computeMatrix scale-regions -S Muscle.2.bin500.bw Muscle.20.bin500.bw Muscle.28.bin500.bw \
	--skipZeros -R mm10/$1.bed -o Muscle.$1.bin500.matrix --outFileNameMatrix Muscle.$1.bin500.matrix.tsv \
	-bs 10 -a 5000 -b 5000 --regionBodyLength 10000 \
	-–blackListFileName blackList.bed # you may mask specific regions with "black list"

	# Plotting heatmap
	plotHeatmap -m Muscle.$1.bin500.matrix -out Muscle.$1.bin500.Heatmap.png --colorList 'white,black' # --zMax 10

	# Plotting profile

mbk0asis / ggplot with p-values

Created December 20, 2017 12:52

	library(ggplot2)
	library(ggpubr)

	setwd("/home/bio0/00-NGS/SETDB1_TCGA")

	dta <- read.csv("exp.LUNG.FPKM.EpiStem.ggplot.2.csv", header = F)
	colnames(dta) <- c("Gene","Symbol","Sample","FPKM","Group")
	names(dta)

	dta2 <- dta[grep("DNMT1", dta$Symbol), ]

mbk0asis / CpG density calculator - bash

Last active March 4, 2018 08:37

	# To caluculate CpG density and distribution in repeat elements. (ERVs, LINE, and etc.)

	## extract information of repeats from repeatMasker database
	$ zcat hg38.repeat.masker.txt.gz \| head
	#bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEndrepLeft id
	0 1892 83 59 14 chr1 67108753 67109046 -181847376 + L1P5 LINE L1 5301 5607 -544 1
	1 2582 27 0 23 chr1 8388315 8388618 -240567804 - AluY SINE Alu -15 296 1 1
	1 4085 171 77 36 chr1 25165803 25166380 -223790042 + L1MB5 LINE L1 5567 6174 0 4
	1 2285 91 0 13 chr1 33554185 33554483 -215401939 - AluSc SINE Alu -6 303 10 6
	1 2451 64 3 26 chr1 41942894 41943205 -207013217 - AluY SINE Alu -7 304 1 8