Skip to content

Instantly share code, notes, and snippets.

View mbk0asis's full-sized avatar

Byungkuk Min mbk0asis

  • Korea Research Institute of Bioscience and Biotechnology (KRIBB)
  • Daejeon, S.Korea
  • 20:34 (UTC +09:00)
View GitHub Profile
setwd("/home/bk/Desktop/hg38")
dta <- read.csv("L1.6kb.CpG.rel.pos.csv",header = F, sep = ",")
dim(dta)
dta.t <- t(dta[,-1])
dta.t <- dta.t[,colSums(is.na(dta.t))<nrow(dta.t)]
nc <- ncol(dta.t)
res <- lapply(1:nc,function(i) {
h<-hist(dta.t[,i], plot=F, breaks = 10)
@mbk0asis
mbk0asis / HISAT2-STRINGTIE-BALLGOWN
Created March 19, 2018 00:07
HISAT2-STRINGTIE-BALLGOWN pipeline
# For RNA-seq reads, use "--dta/--downstream-transcriptome-assembly"
# the command is similar to 'bowtie2'
$ hisat2 -x genome -1 read1.fq.gz -2 read2.fq.gz -S Sample.sam -p 8 --dta 2>&1 | tee $l.stat
$ samtools view -bS Sample.sam | samtools sort -@ 8 - Sample.sorted
# The bam files can be fed into other pipelines e.g DESeq, edgeR, and etc
@mbk0asis
mbk0asis / plotDE.py
Last active March 19, 2018 00:09
extract count data from stringtie output - copied from <http://www.ccb.jhu.edu/software/stringtie/dl/prepDE.py>
#!/usr/bin/env python2
import re, csv, sys, os, glob, warnings, itertools
from math import ceil
from optparse import OptionParser
from operator import itemgetter
MIN_PYTHON = (2, 7)
if sys.version_info < MIN_PYTHON:
sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
library(DESeq2)
library(pcaExplorer)
library(ggplot2)
library(gplots)
library(ggrepel)
library(reshape2)
# set working directory
setwd('~/00-NGS/SETDB1_TCGA/GSE40419/')
1. Google 'GSE40419' (GEO accession number)
2. Look for 'BioProject' or 'SRA' ID (RJNA173917 or ERP001058)
3. Go to 'SRA Run Selector'
4. Enter "RJNA173917" or "ERP001058" in the search.
5. Choose samples to download.
computeMatrix scale-regions -S 2m*.bw 20m*.bw 28m*.bw -R repeatMasker_mm10/ERV1.bed -o Muscle.ERV1.matrix.gz --skipZeros -bs 10 -a 1000 -b 1000 --regionBodyLength 2000 -bl blackList_LINE.LTR
plotHeatmap --whatToShow 'heatmap and colorbar' -m Muscle.ERV1.matrix.gz -out test.Muscle.ERV1.Heatmap.png --samplesLabel 2m-1 2m-2 20m-1 20m-3 28m-1 28m-2 --heatmapHeight 10 --heatmapWidth 1.5 --xAxisLabel "" --regionsLabel "" --startLabel "s" --endLabel "e" --colorList 'white,black'
plotProfile -m Muscle.LINE.bin500.matrix.gz -out Muscle.LINE.bin500.Heatmap.pdf --plotFileFormat pdf --plotHeight 8 --perGroup --averageType mean --colors black red blue --plotType se --startLabel "start" --endLabel "end" --regionsLabel ""
####################################################################
# Load a count data
countsTable <- read.csv("counts.csv",row.names=1, header=T)
head(countsTable)
dim(countsTable)
####################################################################
# run DESeq2
# deepTools - command line
# Use "--outFileNameMatrix" option to export data set (data are already oriented by strand)
$ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed
-o SETDB1.low.Matrix.gz --skipZeros -bs 50
-a 1000 -b 1000 --regionBodyLength 5000
-bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR
--outFileNameMatrix SETDB1.low.Matrix.tab
#source("https://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
library("DESeq2")
library("calibrate")
library("pcaExplorer")
library("ggplot2")
library("gplots")
library("ggrepel")
library("gage")