Skip to content

Instantly share code, notes, and snippets.

View mbk0asis's full-sized avatar

Byungkuk Min mbk0asis

  • Korea Research Institute of Bioscience and Biotechnology (KRIBB)
  • Daejeon, S.Korea
  • 16:17 (UTC +09:00)
View GitHub Profile
@mbk0asis
mbk0asis / configure R with BLAS
Last active November 16, 2018 00:35
configure R with BLAS
# Ubuntu 16.04
# R-3.4.1
1. Download R source package
2. Install dependencies
$ sudo apt-get install fort77 xorg-dev liblzma-dev libblas-dev gfortran gcc-multilib gobjc++ aptitude libreadline-dev
3. Install 'open-blas'
@mbk0asis
mbk0asis / R_saving_multiple_plots
Created July 24, 2017 02:54
R_saving_multiple_plots
# USING "ggplot2" and "for loop"
types <- data.frame(c(rep("breast.C",793),rep("breast.N",97),
rep("colon.C",313),rep("colon.N",38),
rep("kidney.C",324),rep("kidney.N",160),
rep("liver.C",377),rep("liver.N",50),
rep("lung.C",473),rep("lung.N",32),
rep("prostate.C",502),rep("prostate.N",50)))
colnames(types) <- "types"
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/27k_Set2/liver/liver.set2.boxplot..csv",
header = T)
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T)
dim(dta)
##################
types <- data.frame(c(rep("breast.C",793),rep("breast.N",97),
rep("colon.C",313),rep("colon.N",38),
rep("kidney.C",324),rep("kidney.N",160),
rep("liver.C",377),rep("liver.N",50),
# data loading
dta <- read.csv("file:///C:/Users/bk/Desktop/test2.csv",header = T)
dta
# boxplot data
library(ggplot2)
ggplot(dta, aes(x=group,y=count)) +
geom_boxplot()
# linear regression model for data
# drawing a scatter plot and regression line
# draw a scatter plot
library(LSD)
heatscatter( dta$a, dta$b, cor = TRUE, method = "pearson" )
# add regression line
abline( lm( dta$b ~ dta$a ) ) # switch columns (a<-->b)
dta <- read.csv("file:///C:/Users/bk/Desktop/TEST_data.csv",header = T, row.names = 1)
head(dta)
cl <- kmeans(cntNorm,6)
cluster<-cl$cluster
table(cluster)
rNames <- rownames(cnrNorm)
df<-data.frame(cntNorm, rNames, cluster) # attach cluster info on the data frame
head(df)
# Computing matrix
computeMatrix scale-regions -S Muscle.2.bin500.bw Muscle.20.bin500.bw Muscle.28.bin500.bw \
--skipZeros -R mm10/$1.bed -o Muscle.$1.bin500.matrix --outFileNameMatrix Muscle.$1.bin500.matrix.tsv \
-bs 10 -a 5000 -b 5000 --regionBodyLength 10000 \
-–blackListFileName blackList.bed # you may mask specific regions with "black list"
# Plotting heatmap
plotHeatmap -m Muscle.$1.bin500.matrix -out Muscle.$1.bin500.Heatmap.png --colorList 'white,black' # --zMax 10
# Plotting profile
library(ggplot2)
library(ggpubr)
setwd("/home/bio0/00-NGS/SETDB1_TCGA")
dta <- read.csv("exp.LUNG.FPKM.EpiStem.ggplot.2.csv", header = F)
colnames(dta) <- c("Gene","Symbol","Sample","FPKM","Group")
names(dta)
dta2 <- dta[grep("DNMT1", dta$Symbol), ]
# To caluculate CpG density and distribution in repeat elements. (ERVs, LINE, and etc.)
## extract information of repeats from repeatMasker database
$ zcat hg38.repeat.masker.txt.gz | head
#bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEndrepLeft id
0 1892 83 59 14 chr1 67108753 67109046 -181847376 + L1P5 LINE L1 5301 5607 -544 1
1 2582 27 0 23 chr1 8388315 8388618 -240567804 - AluY SINE Alu -15 296 1 1
1 4085 171 77 36 chr1 25165803 25166380 -223790042 + L1MB5 LINE L1 5567 6174 0 4
1 2285 91 0 13 chr1 33554185 33554483 -215401939 - AluSc SINE Alu -6 303 10 6
1 2451 64 3 26 chr1 41942894 41943205 -207013217 - AluY SINE Alu -7 304 1 8