Byungkuk Min mbk0asis

mbk0asis / CpG density calculator - R code

Last active March 4, 2018 08:50

	setwd("/home/bk/Desktop/hg38")

	dta <- read.csv("L1.6kb.CpG.rel.pos.csv",header = F, sep = ",")
	dim(dta)
	dta.t <- t(dta[,-1])
	dta.t <- dta.t[,colSums(is.na(dta.t))<nrow(dta.t)]
	nc <- ncol(dta.t)

	res <- lapply(1:nc,function(i) {
	h<-hist(dta.t[,i], plot=F, breaks = 10)

mbk0asis / HISAT2-STRINGTIE-BALLGOWN

Created March 19, 2018 00:07

HISAT2-STRINGTIE-BALLGOWN pipeline


	# For RNA-seq reads, use "--dta/--downstream-transcriptome-assembly"
	# the command is similar to 'bowtie2'

	$ hisat2 -x genome -1 read1.fq.gz -2 read2.fq.gz -S Sample.sam -p 8 --dta 2>&1 \| tee $l.stat

	$ samtools view -bS Sample.sam \| samtools sort -@ 8 - Sample.sorted


	# The bam files can be fed into other pipelines e.g DESeq, edgeR, and etc

mbk0asis / plotDE.py

Last active March 19, 2018 00:09

extract count data from stringtie output - copied from <http://www.ccb.jhu.edu/software/stringtie/dl/prepDE.py>

	#!/usr/bin/env python2
	import re, csv, sys, os, glob, warnings, itertools
	from math import ceil
	from optparse import OptionParser
	from operator import itemgetter

	MIN_PYTHON = (2, 7)
	if sys.version_info < MIN_PYTHON:
	sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)

mbk0asis / SETDB1_Retro_Expression

Last active April 26, 2018 02:30

mbk0asis / how to use fastq-dump

Last active September 3, 2018 00:43

	1. Google 'GSE40419' (GEO accession number)

	2. Look for 'BioProject' or 'SRA' ID (RJNA173917 or ERP001058)

	3. Go to 'SRA Run Selector'

	4. Enter "RJNA173917" or "ERP001058" in the search.

	5. Choose samples to download.

mbk0asis / deeptools_2

Created April 6, 2018 05:21


	computeMatrix scale-regions -S 2m.bw 20m.bw 28m*.bw -R repeatMasker_mm10/ERV1.bed -o Muscle.ERV1.matrix.gz --skipZeros -bs 10 -a 1000 -b 1000 --regionBodyLength 2000 -bl blackList_LINE.LTR

	plotHeatmap --whatToShow 'heatmap and colorbar' -m Muscle.ERV1.matrix.gz -out test.Muscle.ERV1.Heatmap.png --samplesLabel 2m-1 2m-2 20m-1 20m-3 28m-1 28m-2 --heatmapHeight 10 --heatmapWidth 1.5 --xAxisLabel "" --regionsLabel "" --startLabel "s" --endLabel "e" --colorList 'white,black'

	plotProfile -m Muscle.LINE.bin500.matrix.gz -out Muscle.LINE.bin500.Heatmap.pdf --plotFileFormat pdf --plotHeight 8 --perGroup --averageType mean --colors black red blue --plotType se --startLabel "start" --endLabel "end" --regionsLabel ""

mbk0asis / pcaplot function in DESeq2

Last active May 18, 2022 17:02

	####################################################################
	# Load a count data

	countsTable <- read.csv("counts.csv",row.names=1, header=T)
	head(countsTable)
	dim(countsTable)

	####################################################################
	# run DESeq2

mbk0asis / mean_expression_deepTools

Last active April 27, 2018 05:57

	# deepTools - command line
	# Use "--outFileNameMatrix" option to export data set (data are already oriented by strand)

	$ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed
	-o SETDB1.low.Matrix.gz --skipZeros -bs 50
	-a 1000 -b 1000 --regionBodyLength 5000
	-bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR
	--outFileNameMatrix SETDB1.low.Matrix.tab

mbk0asis / TCGAbiolinks

Created May 15, 2018 01:40

	#source("https://bioconductor.org/biocLite.R")
	#biocLite("TCGAbiolinks")

	library(TCGAbiolinks)
	library(data.table)
	library(dplyr)
	library(DT)

	############################################################
	# Clinical data

mbk0asis / DESeq2-short

Last active May 17, 2018 07:13

	#source("https://bioconductor.org/biocLite.R")
	#biocLite("org.Mm.eg.db")

	library("DESeq2")
	library("calibrate")
	library("pcaExplorer")
	library("ggplot2")
	library("gplots")
	library("ggrepel")
	library("gage")