Skip to content

Instantly share code, notes, and snippets.

View mbk0asis's full-sized avatar

Byungkuk Min mbk0asis

  • Korea Research Institute of Bioscience and Biotechnology (KRIBB)
  • Daejeon, S.Korea
  • 04:50 (UTC +09:00)
View GitHub Profile
source("https://bioconductor.org/biocLite.R")
biocLite("BSgenome.Mmusculus.UCSC.mm10")
setwd("~/BIO2/MBDseq_Mouse_Muscle_Tcell_JungHo")
library(BSgenome.Mmusculus.UCSC.mm10)
chrs <- names(Mmusculus)[1:21]
CGs <- lapply(chrs, function(x) start(matchPattern("CG", Mmusculus[[x]])))
# counting RNA-seq reads
# similar results with htseq-count w/ "UNION"
featureCounts -T 35 -p -t exon -g rmsk_id -a L1.ERV1.ERVK.ERVL.MaLR.gtf -o featureCounts.results \
sample1.bam sample2.bam sample3.bam
@mbk0asis
mbk0asis / INDEL profiler
Created May 31, 2018 04:58
To profile the INDEL patterns in amplicon sequncing reads
printf "\nINDEL.profiler\n\n usage: ./INDEL.profiler.sh READs.fasta Amplicon.seq.fasta\n\n"
cat $1 | \
while read l; do
printf " \n"
read L
echo $l $L | sed 's/ /\n/g' | cat - $2 | muscle -quiet | fasta_formatter | \
while read c; do
printf "\n"$c"\nDEL:\n"
read d;
#source("https://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
library("DESeq2")
library("calibrate")
library("pcaExplorer")
library("ggplot2")
library("gplots")
library("ggrepel")
library("gage")
# deepTools - command line
# Use "--outFileNameMatrix" option to export data set (data are already oriented by strand)
$ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed
-o SETDB1.low.Matrix.gz --skipZeros -bs 50
-a 1000 -b 1000 --regionBodyLength 5000
-bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR
--outFileNameMatrix SETDB1.low.Matrix.tab
####################################################################
# Load a count data
countsTable <- read.csv("counts.csv",row.names=1, header=T)
head(countsTable)
dim(countsTable)
####################################################################
# run DESeq2
computeMatrix scale-regions -S 2m*.bw 20m*.bw 28m*.bw -R repeatMasker_mm10/ERV1.bed -o Muscle.ERV1.matrix.gz --skipZeros -bs 10 -a 1000 -b 1000 --regionBodyLength 2000 -bl blackList_LINE.LTR
plotHeatmap --whatToShow 'heatmap and colorbar' -m Muscle.ERV1.matrix.gz -out test.Muscle.ERV1.Heatmap.png --samplesLabel 2m-1 2m-2 20m-1 20m-3 28m-1 28m-2 --heatmapHeight 10 --heatmapWidth 1.5 --xAxisLabel "" --regionsLabel "" --startLabel "s" --endLabel "e" --colorList 'white,black'
plotProfile -m Muscle.LINE.bin500.matrix.gz -out Muscle.LINE.bin500.Heatmap.pdf --plotFileFormat pdf --plotHeight 8 --perGroup --averageType mean --colors black red blue --plotType se --startLabel "start" --endLabel "end" --regionsLabel ""
1. Google 'GSE40419' (GEO accession number)
2. Look for 'BioProject' or 'SRA' ID (RJNA173917 or ERP001058)
3. Go to 'SRA Run Selector'
4. Enter "RJNA173917" or "ERP001058" in the search.
5. Choose samples to download.
library(DESeq2)
library(pcaExplorer)
library(ggplot2)
library(gplots)
library(ggrepel)
library(reshape2)
# set working directory
setwd('~/00-NGS/SETDB1_TCGA/GSE40419/')