Byungkuk Min mbk0asis

mbk0asis / plotDE.py

Last active March 19, 2018 00:09

extract count data from stringtie output - copied from <http://www.ccb.jhu.edu/software/stringtie/dl/prepDE.py>

	#!/usr/bin/env python2
	import re, csv, sys, os, glob, warnings, itertools
	from math import ceil
	from optparse import OptionParser
	from operator import itemgetter

	MIN_PYTHON = (2, 7)
	if sys.version_info < MIN_PYTHON:
	sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)

mbk0asis / HISAT2-STRINGTIE-BALLGOWN

Created March 19, 2018 00:07

HISAT2-STRINGTIE-BALLGOWN pipeline


	# For RNA-seq reads, use "--dta/--downstream-transcriptome-assembly"
	# the command is similar to 'bowtie2'

	$ hisat2 -x genome -1 read1.fq.gz -2 read2.fq.gz -S Sample.sam -p 8 --dta 2>&1 \| tee $l.stat

	$ samtools view -bS Sample.sam \| samtools sort -@ 8 - Sample.sorted


	# The bam files can be fed into other pipelines e.g DESeq, edgeR, and etc

mbk0asis / CpG density calculator - R code

Last active March 4, 2018 08:50

	setwd("/home/bk/Desktop/hg38")

	dta <- read.csv("L1.6kb.CpG.rel.pos.csv",header = F, sep = ",")
	dim(dta)
	dta.t <- t(dta[,-1])
	dta.t <- dta.t[,colSums(is.na(dta.t))<nrow(dta.t)]
	nc <- ncol(dta.t)

	res <- lapply(1:nc,function(i) {
	h<-hist(dta.t[,i], plot=F, breaks = 10)

mbk0asis / CpG density calculator - bash

Last active March 4, 2018 08:37

	# To caluculate CpG density and distribution in repeat elements. (ERVs, LINE, and etc.)

	## extract information of repeats from repeatMasker database
	$ zcat hg38.repeat.masker.txt.gz \| head
	#bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEndrepLeft id
	0 1892 83 59 14 chr1 67108753 67109046 -181847376 + L1P5 LINE L1 5301 5607 -544 1
	1 2582 27 0 23 chr1 8388315 8388618 -240567804 - AluY SINE Alu -15 296 1 1
	1 4085 171 77 36 chr1 25165803 25166380 -223790042 + L1MB5 LINE L1 5567 6174 0 4
	1 2285 91 0 13 chr1 33554185 33554483 -215401939 - AluSc SINE Alu -6 303 10 6
	1 2451 64 3 26 chr1 41942894 41943205 -207013217 - AluY SINE Alu -7 304 1 8

mbk0asis / ggplot with p-values

Created December 20, 2017 12:52

	library(ggplot2)
	library(ggpubr)

	setwd("/home/bio0/00-NGS/SETDB1_TCGA")

	dta <- read.csv("exp.LUNG.FPKM.EpiStem.ggplot.2.csv", header = F)
	colnames(dta) <- c("Gene","Symbol","Sample","FPKM","Group")
	names(dta)

	dta2 <- dta[grep("DNMT1", dta$Symbol), ]

mbk0asis / deepTools

Last active April 6, 2018 05:24

	# Computing matrix
	computeMatrix scale-regions -S Muscle.2.bin500.bw Muscle.20.bin500.bw Muscle.28.bin500.bw \
	--skipZeros -R mm10/$1.bed -o Muscle.$1.bin500.matrix --outFileNameMatrix Muscle.$1.bin500.matrix.tsv \
	-bs 10 -a 5000 -b 5000 --regionBodyLength 10000 \
	-–blackListFileName blackList.bed # you may mask specific regions with "black list"

	# Plotting heatmap
	plotHeatmap -m Muscle.$1.bin500.matrix -out Muscle.$1.bin500.Heatmap.png --colorList 'white,black' # --zMax 10

	# Plotting profile

mbk0asis / kmeans clustering, R

Last active August 24, 2020 07:35

	dta <- read.csv("file:///C:/Users/bk/Desktop/TEST_data.csv",header = T, row.names = 1)
	head(dta)
	cl <- kmeans(cntNorm,6)
	cluster<-cl$cluster
	table(cluster)

	rNames <- rownames(cnrNorm)
	df<-data.frame(cntNorm, rNames, cluster) # attach cluster info on the data frame
	head(df)

mbk0asis / linear modeling

Last active November 9, 2017 09:02


	# drawing a scatter plot and regression line

	# draw a scatter plot
	library(LSD)
	heatscatter( dta$a, dta$b, cor = TRUE, method = "pearson" )

	# add regression line
	abline( lm( dta$b ~ dta$a ) ) # switch columns (a<-->b)

mbk0asis / Adding a column of row means to the original data frame

Last active November 7, 2017 09:26


	> dta$Mean <- rowMeans(dta)

mbk0asis / One-way ANOVA in R

Last active October 12, 2017 01:51

	# data loading
	dta <- read.csv("file:///C:/Users/bk/Desktop/test2.csv",header = T)
	dta

	# boxplot data
	library(ggplot2)
	ggplot(dta, aes(x=group,y=count)) +
	geom_boxplot()

	# linear regression model for data