This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
import re, csv, sys, os, glob, warnings, itertools | |
from math import ceil | |
from optparse import OptionParser | |
from operator import itemgetter | |
MIN_PYTHON = (2, 7) | |
if sys.version_info < MIN_PYTHON: | |
sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For RNA-seq reads, use "--dta/--downstream-transcriptome-assembly" | |
# the command is similar to 'bowtie2' | |
$ hisat2 -x genome -1 read1.fq.gz -2 read2.fq.gz -S Sample.sam -p 8 --dta 2>&1 | tee $l.stat | |
$ samtools view -bS Sample.sam | samtools sort -@ 8 - Sample.sorted | |
# The bam files can be fed into other pipelines e.g DESeq, edgeR, and etc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("/home/bk/Desktop/hg38") | |
dta <- read.csv("L1.6kb.CpG.rel.pos.csv",header = F, sep = ",") | |
dim(dta) | |
dta.t <- t(dta[,-1]) | |
dta.t <- dta.t[,colSums(is.na(dta.t))<nrow(dta.t)] | |
nc <- ncol(dta.t) | |
res <- lapply(1:nc,function(i) { | |
h<-hist(dta.t[,i], plot=F, breaks = 10) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# To caluculate CpG density and distribution in repeat elements. (ERVs, LINE, and etc.) | |
## extract information of repeats from repeatMasker database | |
$ zcat hg38.repeat.masker.txt.gz | head | |
#bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEndrepLeft id | |
0 1892 83 59 14 chr1 67108753 67109046 -181847376 + L1P5 LINE L1 5301 5607 -544 1 | |
1 2582 27 0 23 chr1 8388315 8388618 -240567804 - AluY SINE Alu -15 296 1 1 | |
1 4085 171 77 36 chr1 25165803 25166380 -223790042 + L1MB5 LINE L1 5567 6174 0 4 | |
1 2285 91 0 13 chr1 33554185 33554483 -215401939 - AluSc SINE Alu -6 303 10 6 | |
1 2451 64 3 26 chr1 41942894 41943205 -207013217 - AluY SINE Alu -7 304 1 8 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(ggpubr) | |
setwd("/home/bio0/00-NGS/SETDB1_TCGA") | |
dta <- read.csv("exp.LUNG.FPKM.EpiStem.ggplot.2.csv", header = F) | |
colnames(dta) <- c("Gene","Symbol","Sample","FPKM","Group") | |
names(dta) | |
dta2 <- dta[grep("DNMT1", dta$Symbol), ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Computing matrix | |
computeMatrix scale-regions -S Muscle.2.bin500.bw Muscle.20.bin500.bw Muscle.28.bin500.bw \ | |
--skipZeros -R mm10/$1.bed -o Muscle.$1.bin500.matrix --outFileNameMatrix Muscle.$1.bin500.matrix.tsv \ | |
-bs 10 -a 5000 -b 5000 --regionBodyLength 10000 \ | |
-–blackListFileName blackList.bed # you may mask specific regions with "black list" | |
# Plotting heatmap | |
plotHeatmap -m Muscle.$1.bin500.matrix -out Muscle.$1.bin500.Heatmap.png --colorList 'white,black' # --zMax 10 | |
# Plotting profile |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dta <- read.csv("file:///C:/Users/bk/Desktop/TEST_data.csv",header = T, row.names = 1) | |
head(dta) | |
cl <- kmeans(cntNorm,6) | |
cluster<-cl$cluster | |
table(cluster) | |
rNames <- rownames(cnrNorm) | |
df<-data.frame(cntNorm, rNames, cluster) # attach cluster info on the data frame | |
head(df) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# drawing a scatter plot and regression line | |
# draw a scatter plot | |
library(LSD) | |
heatscatter( dta$a, dta$b, cor = TRUE, method = "pearson" ) | |
# add regression line | |
abline( lm( dta$b ~ dta$a ) ) # switch columns (a<-->b) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> dta$Mean <- rowMeans(dta) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# data loading | |
dta <- read.csv("file:///C:/Users/bk/Desktop/test2.csv",header = T) | |
dta | |
# boxplot data | |
library(ggplot2) | |
ggplot(dta, aes(x=group,y=count)) + | |
geom_boxplot() | |
# linear regression model for data |