This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("https://bioconductor.org/biocLite.R") | |
biocLite("BSgenome.Mmusculus.UCSC.mm10") | |
setwd("~/BIO2/MBDseq_Mouse_Muscle_Tcell_JungHo") | |
library(BSgenome.Mmusculus.UCSC.mm10) | |
chrs <- names(Mmusculus)[1:21] | |
CGs <- lapply(chrs, function(x) start(matchPattern("CG", Mmusculus[[x]]))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# counting RNA-seq reads | |
# similar results with htseq-count w/ "UNION" | |
featureCounts -T 35 -p -t exon -g rmsk_id -a L1.ERV1.ERVK.ERVL.MaLR.gtf -o featureCounts.results \ | |
sample1.bam sample2.bam sample3.bam |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
printf "\nINDEL.profiler\n\n usage: ./INDEL.profiler.sh READs.fasta Amplicon.seq.fasta\n\n" | |
cat $1 | \ | |
while read l; do | |
printf " \n" | |
read L | |
echo $l $L | sed 's/ /\n/g' | cat - $2 | muscle -quiet | fasta_formatter | \ | |
while read c; do | |
printf "\n"$c"\nDEL:\n" | |
read d; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#source("https://bioconductor.org/biocLite.R") | |
#biocLite("org.Mm.eg.db") | |
library("DESeq2") | |
library("calibrate") | |
library("pcaExplorer") | |
library("ggplot2") | |
library("gplots") | |
library("ggrepel") | |
library("gage") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#source("https://bioconductor.org/biocLite.R") | |
#biocLite("TCGAbiolinks") | |
library(TCGAbiolinks) | |
library(data.table) | |
library(dplyr) | |
library(DT) | |
############################################################ | |
# Clinical data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# deepTools - command line | |
# Use "--outFileNameMatrix" option to export data set (data are already oriented by strand) | |
$ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed | |
-o SETDB1.low.Matrix.gz --skipZeros -bs 50 | |
-a 1000 -b 1000 --regionBodyLength 5000 | |
-bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR | |
--outFileNameMatrix SETDB1.low.Matrix.tab | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################################### | |
# Load a count data | |
countsTable <- read.csv("counts.csv",row.names=1, header=T) | |
head(countsTable) | |
dim(countsTable) | |
#################################################################### | |
# run DESeq2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
computeMatrix scale-regions -S 2m*.bw 20m*.bw 28m*.bw -R repeatMasker_mm10/ERV1.bed -o Muscle.ERV1.matrix.gz --skipZeros -bs 10 -a 1000 -b 1000 --regionBodyLength 2000 -bl blackList_LINE.LTR | |
plotHeatmap --whatToShow 'heatmap and colorbar' -m Muscle.ERV1.matrix.gz -out test.Muscle.ERV1.Heatmap.png --samplesLabel 2m-1 2m-2 20m-1 20m-3 28m-1 28m-2 --heatmapHeight 10 --heatmapWidth 1.5 --xAxisLabel "" --regionsLabel "" --startLabel "s" --endLabel "e" --colorList 'white,black' | |
plotProfile -m Muscle.LINE.bin500.matrix.gz -out Muscle.LINE.bin500.Heatmap.pdf --plotFileFormat pdf --plotHeight 8 --perGroup --averageType mean --colors black red blue --plotType se --startLabel "start" --endLabel "end" --regionsLabel "" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. Google 'GSE40419' (GEO accession number) | |
2. Look for 'BioProject' or 'SRA' ID (RJNA173917 or ERP001058) | |
3. Go to 'SRA Run Selector' | |
4. Enter "RJNA173917" or "ERP001058" in the search. | |
5. Choose samples to download. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(DESeq2) | |
library(pcaExplorer) | |
library(ggplot2) | |
library(gplots) | |
library(ggrepel) | |
library(reshape2) | |
# set working directory | |
setwd('~/00-NGS/SETDB1_TCGA/GSE40419/') |