Last active
March 18, 2021 17:14
-
-
Save k3yavi/cf71cbe753d57cb6acf1c1d34c013045 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages({ | |
library(Signac) | |
library(Seurat) | |
library(Matrix) | |
library(ggplot2) | |
library(patchwork) | |
library(dplyr) | |
library(RcppRoll) | |
library(fastmatch) | |
library(EnsDb.Hsapiens.v86) | |
library(reshape2) | |
sourceCpp("~/MyHmm.cpp") | |
}) | |
# Usage | |
# data <- LoadData() | |
# idents <- factor(c("B", "CD4 T", "Mono")) | |
# HorizontalCoveragePlot(data = data, | |
# region = "PAX5", | |
# gname = "PAX5", | |
# extend.upstream = 5000, | |
# extend.downstream = 5000, | |
# state.plot = "/home/srivastavaa/v10_bingjie_cut_n_tag/chromHMM/model_12/POSTERIOR/", | |
# idents = idents, | |
# marks = c("asap", "k4me1", "k4me2", "k4me3", "k27me3", "k27ac", "pol2", "apol2") | |
# ) | |
{ | |
GetALLPosterior <- function(data, roi, num.threads=10) { | |
qregions <- GetTileRegions(roi) | |
print("extracting 200bp regions") | |
data <- GetRegionMatrix(data, qregions) | |
print("imputing") | |
imp.matrices <- ImputeCounts(data) | |
print("extracting tiles") | |
qregions <- GetTileRegions(roi, combined = F) | |
initialize <- data$hmm$init # rep(1.153589e-153, 12) | |
emission <- as.matrix(data$hmm$emission[names(data[1:5])]) | |
transition <- as.matrix(data$hmm$transition) | |
print("calculating posterior") | |
GetThreadPosterior <- function(x){ | |
range <- length(qregions) %/% num.threads | |
start <- x*range + 1 | |
end <- (x+1)*range | |
probs <- lapply(seq(start, end), function(qid){ | |
string.query <- GRangesToString(qregions[[qid]]) | |
matrices <- lapply(names(data[1:5]), function(mark){ | |
as.matrix(imp.matrices[[mark]]@assays$imputed@data[string.query, Cells(data$az.obj)]) | |
}) | |
posterior.probs <- posterior_fast(initialize, emission, transition, matrices) | |
posterior.probs <- lapply(posterior.probs, function(prob) { | |
rownames(prob) <- string.query | |
prob | |
}) | |
names(posterior.probs) <- Cells(data$az.obj) | |
posterior.probs | |
}) | |
probs | |
} | |
all.post <- bplapply(X = seq(0, num.threads-1), FUN = GetThreadPosterior) | |
all.post <- do.call(c, all.post) | |
all.post | |
} | |
GetPosterior <- function(data, qregion, imp.matrices, full=F) { | |
initialize <- data$hmm$init #rep(1.153589e-153, 12) | |
emission <- as.matrix(data$hmm$emission[names(data[1:5])]) | |
transition <- as.matrix(data$hmm$transition) | |
string.query <- GRangesToString(qregion) | |
posterior.probs <- pblapply(Cells(data$az.obj), function(qcell.id){ | |
observations <- lapply(names(data[1:5]), function(mark){ | |
df <- data.frame(imp.matrices[[mark]]@assays$imputed@data[string.query, qcell.id]) | |
df[is.na(df[1])] <- 0 | |
df | |
}) | |
observations <- as.matrix(do.call(cbind.data.frame, observations)) | |
posterior.prob <- posterior(initialize, emission, transition, observations) | |
if (full) { | |
posterior.prob | |
} else { | |
unlist(apply(posterior.prob, 1, which.max)) | |
} | |
}) | |
names(posterior.probs) <- Cells(data$az.obj) | |
length(posterior.probs) | |
length(posterior.probs[[1]]) | |
posterior.probs | |
} | |
GetPosteriorFast <- function(data, qregions, imp.matrices) { | |
initialize <- data$hmm$init #rep(1.153589e-153, 12) | |
emission <- as.matrix(data$hmm$emission[names(data[1:5])]) | |
transition <- as.matrix(data$hmm$transition) | |
if (class(qregions) == "GRanges") { | |
matrices <- lapply(names(data[1:5]), function(mark){ | |
as.matrix(imp.matrices[[mark]][GRangesToString(qregions), Cells(data$az.obj)]) | |
}) | |
all.probs <- posterior_fast(initialize, emission, transition, matrices) | |
names(all.probs) <- Cells(data$az.obj) | |
} else { | |
all.probs <- pblapply(qregions, function(qregion) { | |
string.query <- GRangesToString(qregion) | |
matrices <- lapply(names(data[1:5]), function(mark){ | |
as.matrix(imp.matrices[[mark]][string.query, Cells(data$az.obj)]) | |
}) | |
posterior.probs <- posterior_fast(initialize, emission, transition, matrices) | |
names(posterior.probs) <- Cells(data$az.obj) | |
posterior.probs | |
}) | |
} | |
all.probs | |
} | |
PlotStates <- function(roi.region.id, posterior.probs, az.obj, split=F, full.posterior=F) { | |
if (full.posterior) { | |
vals <- lapply(posterior.probs, function(probs) { | |
if (length(probs) >= roi.region.id) { | |
probs[roi.region.id, ] | |
} else { | |
rep(0.0, 12) | |
} | |
}) | |
states <- do.call(rbind.data.frame, vals) | |
rownames(states) <- names(vals) | |
dim(states) | |
colnames(states) <- seq(12) | |
states$"1" <- states$"1" + states$"2" | |
states$"3" <- states$"3" + states$"4" + states$"5" | |
states$"9" <- states$"9" + states$"10" + states$"11" + states$"12" | |
states$"6" <- states$"6" + states$"7" + states$"8" | |
states$"2" <- NULL | |
states$"4" <- NULL | |
states$"5" <- NULL | |
states$"10" <- NULL | |
states$"11" <- NULL | |
states$"12" <- NULL | |
states$"7" <- NULL | |
states$"8" <- NULL | |
colnames(states) <- c("Promoter", "Enhancer", "NA", "Repression") | |
states <- states / rowSums(states) | |
az.obj <- data$az.obj | |
az.obj[["states"]] <- CreateAssayObject(t(states)) | |
FeaturePlot(az.obj, reduction = "jumap", features = c("Promoter", "Enhancer", "Repression"), ncol = 3) & | |
scale_colour_gradientn(colours = rev(brewer.pal(n = 5, name = "RdBu"))) | |
} else { | |
vals <- lapply(posterior.probs, function(probs) { | |
if (length(probs) >= roi.region.id) { | |
probs[[roi.region.id]] | |
} else { | |
0.0 | |
} | |
}) | |
states <- unlist(vals) | |
names(states) <- names(vals) | |
print(table(states)) | |
az.obj$states <- states | |
az.obj$states[az.obj$states %in% c(1, 2)] <- "Promoter" | |
az.obj$states[az.obj$states %in% c(3, 4, 5)] <- "Enhancer" | |
az.obj$states[az.obj$states %in% c(9, 10, 11, 12)] <- "Repression" | |
az.obj$states[az.obj$states %in% c(0,6,7,8)] <- NA | |
az.obj$states <- factor(az.obj$states, levels = c("Promoter", "Enhancer", "Repression", "NA")) | |
if (split) { | |
DimPlot(az.obj, reduction = "jumap", group.by = "states", split.by = "states", ncol = 3) | |
} else { | |
DimPlot(az.obj, reduction = "jumap", group.by = "states", label = T, repel = T) | |
} | |
} | |
} | |
GetPlots <- function(roi.region.id, data, imp.matrices) { | |
plots <- lapply(names(data[1:5]), function(mark) { | |
obj <- data[[mark]] | |
DefaultAssay(obj) <- "region" | |
keep.cells <- obj@assays$region@data[rownames(obj@assays$region)[roi.region.id], ] | |
keep.cells <- names(keep.cells[keep.cells > 0]) | |
DimPlot(obj, cells.highlight = keep.cells, reduction = "ref.umap") + NoLegend() + ggtitle(mark) | |
#FeaturePlot(obj, reduction = "ref.umap", features = rownames(obj@assays$region)[roi.region.id]) + NoLegend() + ggtitle(mark) | |
}) | |
i.plots <- lapply(names(data[1:5]), function(mark) { | |
obj <- imp.matrices[[mark]] | |
keep.cells <- obj@assays$imputed@data[roi.region.id, ] | |
keep.cells <- names(keep.cells[keep.cells > 0]) | |
# DimPlot(obj, cells.highlight = keep.cells, reduction = "jumap", group.by = "celltype.l2") + NoLegend() + ggtitle(mark) | |
FeaturePlot(obj, reduction = "jumap", features = rownames(obj@assays$imputed@data)[roi.region.id]) + ggtitle(paste0("imputed_", mark)) | |
}) | |
wrap_plots(wrap_plots(plots, ncol = 2), wrap_plots(i.plots, ncol = 2), ncol = 2) | |
} | |
ImputeCounts <- function(data, region.data) { | |
imp.mat <- lapply(names(data[1:5]), function(mark){ | |
anchors <- Matrix(as.matrix(data$anchors[[mark]]), sparse = T) | |
mat <- region.data[[mark]][, rownames(anchors)] | |
imp.mat <- t(mat %*% anchors) | |
norm.mat <- colSums((anchors > 0)*1) | |
imp.mat <- imp.mat / norm.mat | |
imp.mat <- t(imp.mat) | |
missing.cells <- setdiff(Cells(data$az.obj), colnames(imp.mat)) | |
if(length(missing.cells) > 0) { | |
zeros <- matrix(0, nrow = length(rownames(imp.mat)), ncol = length(missing.cells)) | |
colnames(zeros) <- missing.cells | |
rownames(zeros) <- rownames(imp.mat) | |
imp.mat <- cbind(imp.mat, zeros) | |
} | |
imp.mat[, Cells(data$az.obj)] | |
#obj <- data$az.obj | |
#obj[["imputed"]] <- CreateAssayObject(imp.mat) | |
#obj | |
}) | |
names(imp.mat) <- names(data[1:5]) | |
imp.mat | |
} | |
ImputeVals <- function(data, feats) { | |
imp.mat <- lapply(names(data[1:5]), function(mark){ | |
anchors <- Matrix(as.matrix(data$anchors[[mark]]), sparse = T) | |
mat <- data[[mark]]@assays$region@data[feats, rownames(anchors)] | |
imp.mat <- t(mat %*% anchors) | |
norm.mat <- colSums((anchors > 0)*1) | |
#norm.mat <- ((mat > 0)*1) %*% ((anchors > 0)*1) | |
imp.mat <- imp.mat / norm.mat | |
t(imp.mat) | |
}) | |
names(imp.mat) <- names(data[1:5]) | |
imp.mat | |
} | |
GetRegionMatrix <- function(data, qregion) { | |
region.mat <- lapply(names(data[1:5]), function(mark){ | |
obj <- data[[mark]] | |
cells <- rownames(Matrix(as.matrix(data$anchors[[mark]]), sparse = T)) | |
mat <- SingleFeatureMatrix( | |
obj@assays$tiles@fragments[[1]], | |
qregion, | |
cells = cells, | |
verbose = F | |
) | |
mat <- mat[, cells] | |
mat | |
}) | |
names(region.mat) <- names(data[1:5]) | |
region.mat | |
} | |
GetRegionMatrixOld <- function(data, qregion) { | |
region.mat <- lapply(data[1:5], function(obj){ | |
mat <- SingleFeatureMatrix( | |
obj@assays$tiles@fragments[[1]], | |
qregion, | |
cells = Cells(obj), | |
verbose = F | |
) | |
mat | |
}) | |
names(region.mat) <- names(data[1:5]) | |
for (mark in names(data[1:5])) { | |
data[[mark]][["region"]] <- CreateAssayObject(region.mat[[mark]]) | |
} | |
data | |
} | |
GetTileRegion <- function(gname, window=200, extend.upstream = 5000, extend.downstream = 5000) { | |
region <- Signac:::FindRegion(data[["k4me1"]], region = gname, | |
extend.upstream=extend.upstream, | |
extend.downstream=extend.downstream) | |
start <- region@ranges@start | |
end <- region@ranges@start + region@ranges@width | |
start <- start %/% window | |
end <- end %/% window | |
qregion <- StringToGRanges( paste0(as.character(region@seqnames@values), "-", | |
seq(start*window, end*window, window), "-", | |
seq(start*window + window, end*window + window, window) | |
) | |
) | |
qregion | |
} | |
GetTileRegions <- function(regions, window=200, extend.upstream = 5000, extend.downstream = 5000, combined=T) { | |
if (combined) { | |
ranges.string <- unlist(lapply(regions, function(region) { | |
region <- Signac:::FindRegion(region = region, extend.upstream=extend.upstream, | |
extend.downstream=extend.downstream) | |
start <- region@ranges@start | |
end <- region@ranges@start + region@ranges@width | |
start <- start %/% window | |
end <- end %/% window | |
qregion <- paste0(as.character(region@seqnames@values), "-", seq(start*window, end*window, window), | |
"-", seq(start*window + window, end*window + window, window)) | |
qregion | |
})) | |
StringToGRanges(ranges.string) | |
} else{ | |
lapply(regions, function(region) { | |
GetTileRegion(region) | |
}) | |
} | |
} | |
GetModelParams <- function(file.path) { | |
init.model <- read.table(file.path, skip = 1, nrows = 12) | |
trans.model <- read.table(file.path, skip = 13, nrows = 144) | |
trans.model <- trans.model[, c("V2", "V3", "V4")] | |
trans.model <- dcast(trans.model, V2 ~ V3, value.var = 'V4') | |
trans.model$V2 <- NULL | |
emi.model <- read.table(file.path, skip = 157) | |
emi.model <- emi.model[emi.model$V5 == 1, c("V2", "V4", "V6")] | |
emi.model <- dcast(emi.model, V2 ~ V4, value.var = 'V6') | |
emi.model$V2 <- NULL | |
list( | |
"init" = init.model$V3, | |
"transition" = trans.model, | |
"emission" = emi.model | |
) | |
} | |
LoadData <- function() { | |
k4me1.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me1.rds") | |
k4me2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me2.rds") | |
k4me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me3.rds") | |
k27ac.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27ac.rds") | |
k27me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27me3.rds") | |
k4me1.data$disc.ptime <- round(k4me1.data$pseudo.time, 1) | |
k4me2.data$disc.ptime <- round(k4me2.data$pseudo.time, 1) | |
k4me3.data$disc.ptime <- round(k4me3.data$pseudo.time, 1) | |
k27ac.data$disc.ptime <- round(k27ac.data$pseudo.time, 1) | |
k27me3.data$disc.ptime <- round(k27me3.data$pseudo.time, 1) | |
DefaultAssay(k4me1.data) <- "tiles" | |
DefaultAssay(k4me2.data) <- "tiles" | |
DefaultAssay(k4me3.data) <- "tiles" | |
DefaultAssay(k27ac.data) <- "tiles" | |
DefaultAssay(k27me3.data) <- "tiles" | |
hmm <- GetModelParams("/home/srivastavaa/v10_bingjie_cut_n_tag/chromHMM/model_12/model_12.txt") | |
hmm$init <- hmm$init + 1.153589e-153 | |
roi <- readRDS("~/test_rds/roi.rds") | |
roi <- GRangesToString(roi) | |
cell.types <- names(table(k4me1.data$predicted.celltype.l2)) | |
cell.types <- setdiff(cell.types, c("CD4 Proliferating", "Eryth", "ILC", "Doublet", "NK_CD56bright")) | |
cell.types | |
data <- list( | |
k4me1 = k4me1.data, | |
k4me2 = k4me2.data, | |
k4me3 = k4me3.data, | |
k27ac = k27ac.data, | |
k27me3 = k27me3.data, | |
hmm = hmm, | |
roi = roi, | |
cell.types = cell.types, | |
az.obj = readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/anchors/small.azimuth.rds") | |
) | |
marks <- c("k4me1", "k4me2", "k4me3", "k27ac", "k27me3") | |
anchors <- lapply(marks, function(mark){ | |
anchor <- readRDS(paste0("~/v10_bingjie_cut_n_tag/seurat_objects/fwd_anchors/anchors.", mark, ".rds")) | |
df <- data.frame(anchor@anchors) | |
df$cell2 <- Cells(data[[mark]])[df$cell2] | |
df$cell1 <- Cells(data$az.obj)[df$cell1] | |
colnames(df) <- c("azimuth", "mark", "score") | |
#df <- df[data[[mark]]$predicted.celltype.l1[df$mark] == data$az.obj$celltype.l1[df$azimuth], ] | |
df <- df[df$score > 0, ] | |
score.mat <- dcast(df, mark ~ azimuth, value.var = 'score') | |
rownames(score.mat) <- score.mat$mark | |
score.mat$mark <- NULL | |
score.mat[is.na(score.mat)] <- 0 | |
score.mat | |
}) | |
names(anchors) <- marks | |
data[["anchors"]] <- anchors | |
data | |
} | |
LoadFullData <- function() { | |
k4me1.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me1.rds") | |
k4me2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me2.rds") | |
k4me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me3.rds") | |
k27ac.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27ac.rds") | |
k27me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27me3.rds") | |
k9me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k9me3.rds") | |
poll2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/polII_total.rds") | |
active.poll2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/polII_ser2.rds") | |
asap.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/asap.rds") | |
rna.data <- readRDS("/home/srivastavaa/v8_bingjie_cut_n_tag/seurat_objects/citeseq.rds") | |
DefaultAssay(k4me1.data) <- "tiles" | |
DefaultAssay(k4me2.data) <- "tiles" | |
DefaultAssay(k4me3.data) <- "tiles" | |
DefaultAssay(k27ac.data) <- "tiles" | |
DefaultAssay(k27me3.data) <- "tiles" | |
DefaultAssay(k9me3.data) <- "tiles" | |
DefaultAssay(poll2.data) <- "tiles" | |
DefaultAssay(active.poll2.data) <- "tiles" | |
DefaultAssay(asap.data) <- "ASAP" | |
list( | |
k4me1 = k4me1.data, | |
k4me2 = k4me2.data, | |
k4me3 = k4me3.data, | |
k27ac = k27ac.data, | |
k27me3 = k27me3.data, | |
k9me3 = k9me3.data, | |
pol2 = poll2.data, | |
apol2 = active.poll2.data, | |
asap = asap.data, | |
rna = rna.data | |
) | |
} | |
GetMaxIndex <- function(data, mark, cell.type, group.by, region.data) { | |
#print(table(data[[mark]]@meta.data[group.by])) | |
keep.cells <- rownames(data[[mark]]@meta.data[group.by][data[[mark]]@meta.data[group.by] == cell.type, , drop=F]) | |
which.max(rowSums(region.data[[mark]][, keep.cells])) | |
} | |
SingleFeatureMatrix <- function( | |
fragment, | |
features, | |
cells = NULL, | |
process_n = 2000, | |
sep = c("-", "-"), | |
verbose = TRUE | |
) { | |
fragment.path <- GetFragmentData(object = fragment, slot = "path") | |
if (!is.null(cells)) { | |
# only look for cells that are in the fragment file | |
frag.cells <- GetFragmentData(object = fragment, slot = "cells") | |
# first subset frag.cells | |
cell.idx <- fmatch( | |
x = names(x = frag.cells), | |
table = cells, | |
nomatch = 0L | |
) > 0 | |
cells <- frag.cells[cell.idx] | |
} | |
tbx <- TabixFile(file = fragment.path) | |
features <- keepSeqlevels( | |
x = features, | |
value = intersect( | |
x = seqnames(x = features), | |
y = seqnamesTabix(file = tbx) | |
), | |
pruning.mode = "coarse" | |
) | |
if (length(x = features) == 0) { | |
stop("No matching chromosomes found in fragment file.") | |
} | |
feature.list <- Signac:::ChunkGRanges( | |
granges = features, | |
nchunk = ceiling(x = length(x = features) / process_n) | |
) | |
if (verbose) { | |
message("Extracting reads overlapping genomic regions") | |
} | |
#if (nbrOfWorkers() > 1) { | |
mylapply <- bplapply | |
#} else { | |
# mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) | |
#} | |
matrix.parts <- mylapply( | |
X = feature.list, | |
FUN = Signac:::PartialMatrix, | |
tabix = tbx, | |
cells = cells, | |
sep = sep | |
) | |
# remove any that are NULL (no fragments for any cells in the region) | |
null.parts <- sapply(X = matrix.parts, FUN = is.null) | |
matrix.parts <- matrix.parts[!null.parts] | |
if (is.null(x = cells)) { | |
all.cells <- unique( | |
x = unlist(x = lapply(X = matrix.parts, FUN = colnames)) | |
) | |
matrix.parts <- lapply( | |
X = matrix.parts, | |
FUN = AddMissingCells, | |
cells = all.cells | |
) | |
} | |
featmat <- Reduce(f = rbind, x = matrix.parts) | |
if (!is.null(x = cells)) { | |
# cells supplied, rename with cell name from object rather than file | |
cell.convert <- names(x = cells) | |
names(x = cell.convert) <- cells | |
colnames(x = featmat) <- unname(obj = cell.convert[colnames(x = featmat)]) | |
} | |
# reorder features | |
feat.str <- GRangesToString(grange = features) | |
featmat <- featmat[feat.str, ] | |
return(featmat) | |
} | |
PlotSCCoverage <- function(data, data_full, select.roi, gname) { | |
qregion <- GetTileRegion(select.roi, window=200, | |
extend.upstream = 5000, | |
extend.downstream = 5000) | |
region.data <- GetRegionMatrix(data, qregion) | |
imp.matrices <- ImputeCounts(data, region.data) | |
posterior.probs <- GetPosteriorFast(data, qregion, imp.matrices) | |
celltypes <- c("B", "CD4 T", "CD8 T", "NK", "Mono", "DC") | |
sc.plot <- lapply(celltypes, function(ct) { | |
keep.cells <- names(data$az.obj$celltype.l1[data$az.obj$celltype.l1 == ct]) | |
nmat <- Reduce("+", posterior.probs[keep.cells]) / length(keep.cells) | |
nmat <- nmat / rowSums(nmat) | |
colnames(nmat) <- paste0("E", seq(12)) | |
data.frame(nmat) | |
}) | |
names(sc.plot) <- celltypes | |
HorizontalCoveragePlot(data = data_full, | |
region = select.roi, | |
gname = gname, | |
extend.upstream = 5000, | |
extend.downstream = 5000, | |
group.by = "predicted.celltype.l1", | |
idents = c("B", "CD4 T", "CD8 T", "NK", "Mono", "DC"), | |
state.plot = "~/v10_bingjie_cut_n_tag/chromHMM/model_12/POSTERIOR/", | |
sc.state.plot = sc.plot, | |
marks = c("k4me1", "k4me2", "k4me3", "k27ac", "k27me3", "apol2", "pol2"), | |
window = 2000 | |
) | |
} | |
PlotThings <- function(data, loci, gname, | |
group.by="predicted.celltype.l1", | |
window=500, | |
idents = c("B", "CD4 T", "CD8 T", "DC", "Mono", "NK")) { | |
p6 <- CoveragePlot( | |
object = data[["asap"]], | |
region = loci, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
) + ggtitle('ATAC') | |
p7 <- CoveragePlot( | |
object = data[["k27me3"]], | |
region = loci, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
) + ggtitle('H3K27me3') | |
p1 <- CoveragePlot( | |
object = data[["k4me1"]], | |
region = loci, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
) + ggtitle('H3K4me1') | |
p2 <- CoveragePlot( | |
object = data[["k4me2"]], | |
region = loci, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
) + ggtitle('H3K4me2') | |
p3 <- CoveragePlot( | |
object = data[["k4me3"]], | |
region = loci, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
) + ggtitle('H3K4me3') | |
p4 <- CoveragePlot( | |
object = data[["k27ac"]], | |
region = loci, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
) + ggtitle('H3K27ac') | |
p5 <- AnnotationPlot( | |
object = data[["k27ac"]], | |
region = loci | |
) | |
expression.plot <- ExpressionPlot( | |
object = data[["rna"]], | |
features = gname, | |
assay = "RNA", | |
group.by = group.by, | |
idents = idents, | |
) | |
plotlist = list(p6, p1, p2, p3, p4, p7, p5) | |
# remove x-axis from all but last plot | |
for (i in 1:(length(x = plotlist) - 1)) { | |
plotlist[[i]] <- plotlist[[i]] + theme( | |
axis.title.x = element_blank(), | |
axis.text.x = element_blank(), | |
axis.line.x.bottom = element_blank(), | |
axis.ticks.x.bottom = element_blank() | |
) | |
} | |
heights = c(10, 10, 10, 10, 10, 10, 1) | |
widths = c(10, 1) | |
# align expression plot with the first element in plot list | |
plotlist[[1]] <- (plotlist[[1]] + expression.plot) + | |
plot_layout(widths = widths) | |
plotlist[[2]] <- (plotlist[[2]] + expression.plot) + | |
plot_layout(widths = widths) | |
plotlist[[3]] <- (plotlist[[3]] + expression.plot) + | |
plot_layout(widths = widths) | |
plotlist[[4]] <- (plotlist[[4]] + expression.plot) + | |
plot_layout(widths = widths) | |
plotlist[[5]] <- (plotlist[[5]] + expression.plot) + | |
plot_layout(widths = widths) | |
plotlist[[6]] <- (plotlist[[6]] + expression.plot) + | |
plot_layout(widths = widths) | |
p2 <- wrap_plots(plotlist[1:5], | |
heights = heights[1:5], | |
byrow = T | |
) | |
p <- plotlist[[6]] + plotlist[[7]] + guide_area() + plot_layout( | |
ncol = 2, heights = c(10, 2), | |
guides = "collect") | |
design <- "B | |
A" | |
wrap_plots(B = p2, A = p, design = design, heights = c(50, 10)) | |
} | |
HorizontalCoverageTrack <- function( | |
cutmat, | |
region, | |
group.scale.factors, | |
scale.factor, | |
obj.groups, | |
ymax, | |
downsample.rate, | |
window = 100, | |
max.downsample = 3000, | |
tile = NULL, | |
mark, | |
color | |
) { | |
window.size <- width(x = region) | |
levels.use <- levels(x = obj.groups) | |
coverages <- Signac:::ApplyMatrixByGroup( | |
mat = cutmat, | |
fun = colSums, | |
groups = obj.groups, | |
group.scale.factors = group.scale.factors, | |
scale.factor = scale.factor, | |
normalize = TRUE | |
) | |
if (!is.na(x = window)) { | |
coverages <- group_by(.data = coverages, group) | |
coverages <- mutate(.data = coverages, coverage = roll_sum( | |
x = norm.value, n = window, fill = NA, align = "center" | |
)) | |
coverages <- ungroup(x = coverages) | |
} else { | |
coverages$coverage <- coverages$norm.value | |
} | |
chromosome <- as.character(x = seqnames(x = region)) | |
start.pos <- start(x = region) | |
end.pos <- end(x = region) | |
coverages <- coverages[!is.na(x = coverages$coverage), ] | |
coverages <- group_by(.data = coverages, group) | |
sampling <- min(max.downsample, window.size * downsample.rate) | |
coverages <- slice_sample(.data = coverages, n = sampling) | |
# restore factor levels | |
if (!is.null(x = levels.use)) { | |
colors_all <- hue_pal()(length(x = levels.use)) | |
names(x = colors_all) <- levels.use | |
coverages$group <- factor(x = coverages$group, levels = levels.use) | |
} | |
if (is.null(ymax)) { | |
ymax <- signif(x = max(coverages$coverage, na.rm = TRUE), digits = 2) | |
} | |
ymin <- 0 | |
gr <- GRanges( | |
seqnames = chromosome, | |
IRanges(start = start.pos, end = end.pos) | |
) | |
if (!is.null(tile)) { | |
diffs <- coverages$position - tile | |
coverages$tile <- coverages$position[which.min(diffs)] | |
} | |
num.groups <- length(obj.groups) | |
p <- ggplot( | |
data = coverages, | |
mapping = aes(x = position, y = coverage, fill = group) | |
) + geom_area(stat = "identity") + | |
geom_hline(yintercept = 0, size = 0.1) + | |
geom_vline(xintercept = tile, size = 0.1) + | |
scale_fill_manual(values = rep(color, num.groups)) + | |
facet_wrap(facets = ~group, strip.position = "top", nrow = 1) + | |
xlab(label = paste0(chromosome, " position (bp)")) + | |
ylab(label = paste0(mark)#, " \n(range ", | |
#as.character(x = ymin), " - ", | |
#as.character(x = ymax), ")") | |
) + | |
ylim(c(ymin, ymax)) + | |
theme_browser(legend = FALSE) + | |
theme(axis.text.x = element_text(angle = 90), | |
strip.text = element_text(size = 20), | |
#axis.title.y = element_text(size = 16) | |
panel.spacing = unit(1, "lines") | |
) | |
if (!is.null(x = levels.use)) { | |
p <- p + scale_fill_manual(values = colors_all) | |
} | |
return(p) | |
} | |
HorizontalExpressionPlot <- function( | |
object, | |
features, | |
assay = NULL, | |
group.by = NULL, | |
idents = NULL, | |
slot = "data" | |
) { | |
# get data | |
assay <- DefaultAssay(object = object) | |
data.plot <- GetAssayData( | |
object = object, | |
assay = assay, | |
slot = slot | |
)[features, ] | |
obj.groups <- Signac:::GetGroups( | |
object = object, | |
group.by = group.by, | |
idents = NULL | |
) | |
# if levels set, define colors based on all groups | |
levels.use <- levels(x = obj.groups) | |
if (!is.null(x = levels.use)) { | |
colors_all <- hue_pal()(length(x = levels.use)) | |
names(x = colors_all) <- levels.use | |
} | |
if (!is.null(x = idents)) { | |
cells.keep <- names(x = obj.groups)[ | |
fmatch(x = obj.groups, table = idents, nomatch = 0L) > 0 | |
] | |
if (length(x = features) > 1) { | |
data.plot <- data.plot[, cells.keep] | |
} else { | |
data.plot <- data.plot[cells.keep] | |
} | |
obj.groups <- obj.groups[cells.keep] | |
} | |
# construct data frame | |
if (length(x = features) == 1) { | |
df <- data.frame( | |
gene = features, | |
expression = data.plot, | |
group = obj.groups | |
) | |
} else { | |
df <- data.frame() | |
for (i in features) { | |
df.1 <- data.frame( | |
gene = i, | |
expression = data.plot[i, ], | |
group = obj.groups | |
) | |
df <- rbind(df, df.1) | |
} | |
} | |
p.list <- list() | |
for (i in seq_along(along.with = features)) { | |
df.use <- df[df$gene == features[[i]], ] | |
p <- ggplot(data = df.use, aes(x = expression, y = gene, fill = group)) + | |
geom_violin(size = 1/4) + | |
facet_wrap(~group, nrow = 1, strip.position = "right") + | |
theme_classic() + | |
scale_y_discrete(position = "top") + | |
scale_x_continuous(position = "bottom", limits = c(0, NA)) + | |
theme( | |
axis.text.y = element_blank(), | |
axis.text.x = element_text(size = 8), | |
axis.title.x = element_blank(), | |
strip.background = element_blank(), | |
strip.text.y = element_blank(), | |
legend.position = "none", | |
panel.spacing = unit(1, "lines") | |
) | |
if (!is.null(x = levels.use)) { | |
p <- p + scale_fill_manual(values = colors_all) | |
} | |
p.list[[i]] <- p | |
} | |
p <- wrap_plots(p.list, ncol = length(x = p.list)) | |
return(p) | |
} | |
HorizontalSingleCoveragePlot <- function( | |
object, | |
region, | |
mark, | |
color, | |
twoh.tile.region = NULL, | |
features = NULL, | |
assay = NULL, | |
show.bulk = FALSE, | |
expression.assay = NULL, | |
expression.slot = "data", | |
annotation = TRUE, | |
peaks = TRUE, | |
ranges = NULL, | |
ranges.title = "Ranges", | |
links = TRUE, | |
tile = FALSE, | |
tile.size = 100, | |
tile.cells = 100, | |
group.by = NULL, | |
window = 100, | |
extend.upstream = 0, | |
extend.downstream = 0, | |
ymax = NULL, | |
scale.factor = NULL, | |
cells = NULL, | |
idents = NULL, | |
sep = c("-", "-"), | |
heights = NULL, | |
max.downsample = 3000, | |
downsample.rate = 0.1 | |
) { | |
cells <- colnames(x = object) | |
assay <- DefaultAssay(object = object) | |
if (!is.null(x = group.by)) { | |
Idents(object = object) <- group.by | |
} | |
if (!is.null(x = idents)) { | |
ident.cells <- WhichCells(object = object, idents = idents) | |
cells <- intersect(x = cells, y = ident.cells) | |
} | |
region <- Signac:::FindRegion( | |
object = object, | |
region = region, | |
sep = sep, | |
assay = assay, | |
extend.upstream = extend.upstream, | |
extend.downstream = extend.downstream | |
) | |
if (!is.null(twoh.tile.region)) { | |
twoh.tile.region <- start(twoh.tile.region) | |
} | |
reads.per.group <- AverageCounts( | |
object = object, | |
group.by = group.by, | |
verbose = FALSE | |
) | |
cells.per.group <- CellsPerGroup( | |
object = object, | |
group.by = group.by | |
) | |
cutmat <- Signac:::CutMatrix( | |
object = object, | |
region = region, | |
assay = assay, | |
cells = cells, | |
verbose = FALSE | |
) | |
colnames(cutmat) <- start(x = region):end(x = region) | |
group.scale.factors <- suppressWarnings(reads.per.group * cells.per.group) | |
scale.factor <- median(x = group.scale.factors) | |
obj.groups <- Signac:::GetGroups( | |
object = object, | |
group.by = group.by, | |
idents = idents | |
) | |
p <- HorizontalCoverageTrack( | |
cutmat = cutmat, | |
region = region, | |
group.scale.factors = group.scale.factors, | |
scale.factor = scale.factor, | |
window = window, | |
ymax = ymax, | |
obj.groups = obj.groups, | |
downsample.rate = downsample.rate, | |
max.downsample = max.downsample, | |
mark = mark, | |
color = color, | |
tile = twoh.tile.region | |
) | |
if (!is.null(x = features)) { | |
ex.plot <- HorizontalExpressionPlot( | |
object = object, | |
features = features, | |
assay = expression.assay, | |
idents = idents, | |
group.by = group.by, | |
slot = expression.slot | |
) | |
ex.plot | |
widths <- c(10, length(x = features)) | |
} else { | |
ex.plot <- NULL | |
widths <- NULL | |
} | |
if (annotation) { | |
gene.plot <- AnnotationPlot(object = object, region = region) | |
} else { | |
gene.plot <- NULL | |
} | |
if (links) { | |
link.plot <- LinkPlot(object = object, region = region) | |
} else { | |
link.plot <- NULL | |
} | |
if (peaks) { | |
peak.plot <- PeakPlot(object = object, region = region) | |
} else { | |
peak.plot <- NULL | |
} | |
if (!is.null(x = ranges)) { | |
range.plot <- PeakPlot( | |
object = object, | |
region = region, | |
peaks = ranges, | |
color = "brown3") + | |
ylab(ranges.title) | |
} else { | |
range.plot <- NULL | |
} | |
if (tile) { | |
# reuse cut matrix | |
tile.df <- ComputeTile( | |
cutmatrix = cutmat, | |
groups = obj.groups, | |
window = tile.size, | |
n = tile.cells, | |
order = "total" | |
) | |
tile.plot <- CreateTilePlot( | |
df = tile.df, | |
n = tile.cells | |
) | |
} else { | |
tile.plot <- NULL | |
} | |
if (show.bulk) { | |
object$bulk <- "All cells" | |
reads.per.group <- AverageCounts( | |
object = object, | |
group.by = "bulk", | |
verbose = FALSE | |
) | |
cells.per.group <- CellsPerGroup( | |
object = object, | |
group.by = "bulk" | |
) | |
bulk.scale.factor <- suppressWarnings(reads.per.group * cells.per.group) | |
bulk.groups <- rep(x = "All cells", length(x = obj.groups)) | |
names(x = bulk.groups) <- names(x = obj.groups) | |
bulk.plot <- HorizontalCoverageTrack( | |
cutmat = cutmat, | |
region = region, | |
group.scale.factors = bulk.scale.factor, | |
scale.factor = scale.factor, | |
window = window, | |
ymax = ymax, | |
obj.groups = bulk.groups, | |
downsample.rate = downsample.rate, | |
max.downsample = max.downsample | |
) + | |
scale_fill_manual(values = "grey") + | |
ylab("") | |
} else { | |
bulk.plot <- NULL | |
} | |
nident <- length(x = unique(x = obj.groups)) | |
heights <- c(10, (1 / nident) * 10, 10, 2, 1, 1, 3) | |
p <- CombineTracks( | |
plotlist = list(p, bulk.plot, tile.plot, gene.plot, | |
peak.plot, range.plot, link.plot), | |
expression.plot = ex.plot, | |
heights = heights, | |
widths = widths | |
) | |
return(p) | |
} | |
gg_color_hue <- function(n) { | |
hues = seq(15, 375, length = n + 1) | |
hcl(h = hues, l = 65, c = 100)[1:n] | |
} | |
HorizontalMotifPlot <- function(object, region, motif) { | |
label = motif | |
motif.names <- object@assays$tiles@[email protected] | |
motif <- names(motif.names[grep(motif, motif.names)]) | |
if (length(motif) == 0) { print("ERROR: can't find the motif name") } | |
mpeaks <- object$tiles@motifs@data[, motif] | |
mpeaks <- mpeaks[mpeaks > 0] | |
mpeaks.ranges <- StringToGRanges(names(mpeaks)) | |
if (!inherits(x = region, what = "GRanges")) { | |
region <- StringToGRanges(regions = region) | |
} | |
motif.peaks <-GRangesToString(subsetByOverlaps(mpeaks.ranges, region)) | |
all.peaks <- granges(x = object) | |
peak.intersect <- subsetByOverlaps(x = all.peaks, ranges = region) | |
peak.df <- as.data.frame(x = peak.intersect) | |
peak.df$ctype <-"dimgrey" | |
rownames(peak.df) <- paste0(peak.df$seqnames, "-", peak.df$start, "-", peak.df$end) | |
peak.df[motif.peaks, ]$ctype <- "#F8766D" | |
start.pos <- start(x = region) | |
end.pos <- end(x = region) | |
chromosome <- seqnames(x = region) | |
if (nrow(x = peak.df) > 0) { | |
peak.df$start[peak.df$start < start.pos] <- start.pos | |
peak.df$end[peak.df$end > end.pos] <- end.pos | |
peak.plot <- ggplot(data = peak.df, mapping = aes(color = ctype)) + | |
geom_segment(aes(x = start, y = 0, xend = end, yend = 0), | |
size = 2, | |
data = peak.df) | |
} else { | |
# no peaks present in region, make empty panel | |
peak.plot <- ggplot(data = peak.df) | |
} | |
peak.plot <- peak.plot + theme_classic() + | |
ylab(label = label) + | |
theme(axis.ticks.y = element_blank(), | |
axis.ticks.x = element_blank(), | |
axis.text.y = element_blank(), | |
legend.position = "none", | |
axis.title.x = element_blank(), | |
axis.text.x = element_blank() | |
) + | |
#xlab(label = paste0(chromosome, " position (bp)")) + | |
xlim(c(start.pos, end.pos)) + | |
scale_color_identity() | |
return(peak.plot) | |
} | |
HorizontalTilePlot <- function(object, region, tile) { | |
label = "tile" | |
tile.ranges <- StringToGRanges(tile) | |
if (!inherits(x = region, what = "GRanges")) { | |
region <- StringToGRanges(regions = region) | |
} | |
#peak.df <- as.data.frame(x = region) | |
#peak.df$ctype <- "dimgrey" | |
peak.df <- as.data.frame(x = tile.ranges) | |
peak.df$ctype <- "dimgrey" | |
#peak.df <- rbind(peak.df, tile.df) | |
#rownames(peak.df) <- paste0(peak.df$seqnames, "-", peak.df$start, "-", peak.df$end) | |
start.pos <- start(x = region) | |
end.pos <- end(x = region) | |
chromosome <- seqnames(x = region) | |
if (nrow(x = peak.df) > 0) { | |
peak.df$start[peak.df$start < start.pos] <- start.pos | |
peak.df$end[peak.df$end > end.pos] <- end.pos | |
peak.plot <- ggplot(data = peak.df, mapping = aes(color = ctype)) + | |
geom_segment(aes(x = start, y = 0, xend = end, yend = 0), | |
size = 10, | |
data = peak.df) | |
} else { | |
# no peaks present in region, make empty panel | |
peak.plot <- ggplot(data = peak.df) | |
} | |
peak.plot <- peak.plot + theme_classic() + | |
ylab(label = label) + | |
theme(axis.ticks.y = element_blank(), | |
axis.ticks.x = element_blank(), | |
axis.text.y = element_blank(), | |
legend.position = "none", | |
axis.title.x = element_blank(), | |
axis.text.x = element_blank() | |
) + | |
#xlab(label = paste0(chromosome, " position (bp)")) + | |
xlim(c(start.pos, end.pos)) + | |
scale_color_identity() | |
return(peak.plot) | |
} | |
HorizontalCoveragePlot <- function(data, region, | |
gname=NULL, | |
pname=NULL, | |
motif = NULL, | |
tile = NULL, | |
state.plot = NULL, | |
sc.state.plot = NULL, | |
time.plot = F, | |
group.by="predicted.celltype.l1", | |
window=500, | |
idents = c("B", "CD4 T", "CD8 T", "DC", "Mono", "NK"), | |
extend.upstream = 0, | |
extend.downstream = 0, | |
marks = c("asap", "k4me1", "k4me2", "k4me3", "k27ac", "k27me3", "k9me3") | |
) { | |
num.marks <- length(marks) | |
colors <- gg_color_hue(num.marks) | |
region.range <- Signac:::FindRegion( | |
object = data[["k27me3"]], | |
region = region, | |
extend.upstream = extend.upstream, | |
extend.downstream = extend.downstream | |
) | |
region <- GRangesToString(region.range) | |
marks.map <- c("ATAC", "H3K4me1", "H3K4me2", "H3K4me3", "H3K27me3", "H3K27ac", "PolII", "active_PolII", "H3K9me3") | |
names(marks.map) <- c("asap", "k4me1", "k4me2", "k4me3", "k27me3", "k27ac", "pol2", "apol2", "k9me3") | |
if (!is.null(tile)) { | |
tile = StringToGRanges(tile) | |
} | |
plotlist <- lapply(seq(num.marks), function(mark.id) { | |
mark <- marks[[mark.id]] | |
HorizontalSingleCoveragePlot( object = data[[mark]], | |
region = region, | |
peaks = F, | |
window = window, | |
annotation = F, | |
group.by = group.by, | |
idents = idents, | |
twoh.tile.region = tile, | |
mark = marks.map[[mark]], | |
color = colors[[mark.id]] | |
) | |
}) | |
for (i in 1:(length(x = plotlist))) { | |
plotlist[[i]] <- plotlist[[i]] + theme( | |
axis.title.x = element_blank(), | |
axis.text.x = element_blank(), | |
axis.line.x.bottom = element_blank(), | |
axis.ticks.x.bottom = element_blank() | |
) | |
} | |
for (i in 2:(length(x = plotlist))) { | |
plotlist[[i]] <- plotlist[[i]] + theme( | |
strip.text.x = element_blank() | |
) | |
} | |
gene_plot <- AnnotationPlot( | |
object = data[["k27me3"]], | |
region = region | |
) | |
if (!is.null(gname)) { | |
exp.obj <- data[["rna"]] | |
DefaultAssay(exp.obj) <- "RNA" | |
expr_plot <- lapply(gname, function(feat) | |
HorizontalExpressionPlot( | |
object = exp.obj, | |
features = feat, | |
assay = "RNA", | |
group.by = group.by, | |
idents = idents | |
) | |
) | |
expr_plot <- wrap_plots(expr_plot, nrow = length(gname)) | |
} else if (!is.null(pname)) { | |
expr_plot <- lapply(marks, function(feat) { | |
exp.obj <- data[[feat]] | |
DefaultAssay(exp.obj) <- "ADT" | |
if (feat == "asap") { | |
if (pname == "CD57") { | |
pname <- "CD57-Recombinant" | |
} | |
ProteinPlot( | |
object = exp.obj, | |
name = marks.map[[feat]], | |
features = pname, | |
assay = "ADT", | |
group.by = group.by, | |
idents = idents | |
) | |
} else { | |
ProteinPlot( | |
object = exp.obj, | |
name = marks.map[[feat]], | |
features = pname, | |
assay = "ADT", | |
group.by = group.by, | |
idents = idents | |
) | |
} | |
}) | |
expr_plot <- wrap_plots(expr_plot, nrow = length(pname)) | |
} else { | |
print("ERROR: must provide gname or pname") | |
return (NULL) | |
} | |
num.idents <- length(idents) | |
gene_plots <- lapply(seq(num.idents), function(x) gene_plot) | |
gene_plots[[1]] <- gene_plots[[1]] + theme( | |
axis.text.x = element_text(angle = 15), | |
) | |
for (i in 2:length(gene_plots)) { | |
gene_plots[[i]] <- gene_plots[[i]] + theme( | |
axis.text.y=element_blank(), | |
axis.text.x = element_text(angle = 15), | |
axis.line.y=element_blank(), | |
axis.title.y=element_blank(), | |
panel.spacing = unit(2, "lines") | |
) | |
} | |
gene_plots <- wrap_plots(gene_plots, nrow = 1) | |
if (!is.null(motif)) { | |
motif.plot <- HorizontalMotifPlot(object = data[["k4me1"]], | |
region = region.range, | |
motif = motif) | |
motif_plots <- lapply(seq(num.idents), function(x) motif.plot) | |
for (i in 2:length(motif_plots)) { | |
motif_plots[[i]] <- motif_plots[[i]] + theme( | |
axis.text.y=element_blank(), | |
axis.line.y=element_blank(), | |
axis.title.y=element_blank() | |
) | |
} | |
gene_plots <- wrap_plots(motif_plots, nrow = 1) / gene_plots | |
} | |
#if (!is.null(tile)) { | |
# tile.plot <- HorizontalTilePlot(object = data[["k4me1"]], | |
# region = region.range, | |
# tile = tile) | |
# tile_plots <- lapply(seq(num.idents), function(x) tile.plot) | |
# | |
# for (i in 2:length(tile_plots)) { | |
# tile_plots[[i]] <- tile_plots[[i]] + theme( | |
# axis.text.y=element_blank(), | |
# axis.line.y=element_blank(), | |
# axis.title.y=element_blank() | |
# ) | |
# } | |
# | |
# gene_plots <- wrap_plots(tile_plots, nrow = 1) / gene_plots | |
#} | |
if (!is.null(state.plot)) { | |
splots <- lapply(idents, function(x) StatePlot(state.plot, region.range, gsub(" ", "_", x))) | |
for (i in 2:length(splots)) { | |
splots[[i]] <- splots[[i]] + theme( | |
axis.text.y=element_blank(), | |
axis.line.y=element_blank(), | |
axis.title.y=element_blank(), | |
axis.ticks.y=element_blank() | |
) | |
} | |
splots <- wrap_plots(splots, nrow = 1) | |
} | |
if (!is.null(sc.state.plot)) { | |
sc.splots <- lapply(idents, function(x) SCStatePlot(sc.state.plot, x)) | |
for (i in 2:length(sc.splots)) { | |
sc.splots[[i]] <- sc.splots[[i]] + theme( | |
axis.text.y=element_blank(), | |
axis.line.y=element_blank(), | |
axis.title.y=element_blank(), | |
axis.ticks.y=element_blank() | |
) | |
} | |
sc.splots <- wrap_plots(sc.splots, nrow = 1) | |
} | |
if (!time.plot) { | |
if(is.null(state.plot)) { | |
plotlist[[num.marks + 1]] <- gene_plots | |
plotlist[[num.marks + 2]] <- expr_plot | |
wrap_plots(plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10)) | |
} else if(!is.null(sc.state.plot)) { | |
plotlist[[num.marks + 1]] <- gene_plots | |
plotlist[[num.marks + 2]] <- splots | |
plotlist[[num.marks + 3]] <- sc.splots | |
plotlist[[num.marks + 4]] <- expr_plot | |
wrap_plots( plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10, 10, 10)) | |
} else { | |
plotlist[[num.marks + 1]] <- gene_plots | |
plotlist[[num.marks + 2]] <- splots | |
plotlist[[num.marks + 3]] <- expr_plot | |
wrap_plots( plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10, 10)) | |
} | |
} else { | |
wrap_plots( list( | |
expr_plot[[1]], | |
expr_plot[[2]], | |
expr_plot[[3]], | |
expr_plot[[4]], | |
expr_plot[[5]], | |
expr_plot[[6]], | |
expr_plot[[7]] | |
), | |
nrow = 7, | |
heights = c(rep(10, 7)) | |
) | |
} | |
} | |
ProteinPlot <- function( | |
object, | |
name, | |
features, | |
assay = NULL, | |
group.by = NULL, | |
idents = NULL | |
) { | |
print(name) | |
ptime <- [email protected][, group.by, drop=F] | |
ptime <- ptime[ptime[, 1] %in% idents, , drop=F] | |
#olaps <- data.frame(findOverlaps(StringToGRanges(rownames(object[[assay]]@data)), features)) | |
#features <- olaps$queryHits | |
ptime$cts <- colSums(object[[assay]]@data[features, , drop=F])[rownames(ptime)] | |
ptime$time <- object$pseudo.time[rownames(ptime)] | |
colnames(ptime) <- c("round.ptime", "counts", "pseudo.time") | |
#ptime <- ptime[ptime$counts > 0, ] | |
#ptime$categ <- 0 | |
#ptime[ptime$round.ptime == idents[2], ]$categ <- 1 | |
#ptime[ptime$round.ptime == idents[3], ]$categ <- 2 | |
#print(head(ptime)) | |
fit = mgcv::gam(counts ~ s(pseudo.time, k=50), data = ptime) | |
newx = data.frame(pseudo.time=with(ptime, seq(min(pseudo.time), max(pseudo.time), len=100))) | |
pred = predict(fit, newdata=newx, se=T) | |
pred = cbind(pseudo.time=newx, counts=pred$fit) | |
pred.range <- max(pred$counts) - min(pred$counts) | |
actual.range <- max(ptime$counts) - min(ptime$counts) | |
norm <- actual.range / pred.range | |
pred$counts <- pred$counts - min(pred$counts) | |
pred$counts <- pred$counts * norm | |
ptime$round.ptime <- factor(ptime$round.ptime, levels=idents) | |
p <- ggplot(ptime, aes(x=pseudo.time, y=counts)) + | |
geom_point(aes(color=round.ptime)) | |
p_smooth <- geom_smooth(data=ptime, se=F, fullrange=TRUE, | |
method = "gam", formula = y ~ s(x, k = 100)) | |
#p_smooth <- geom_smooth(data=pred, stat='identity') | |
p <- p + p_smooth + | |
scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) + | |
theme_bw() + | |
theme(panel.border = element_blank(), | |
#panel.grid.major = element_blank(), | |
#panel.grid.minor = element_blank(), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.title.x = element_blank(), | |
axis.line.x = element_blank(), | |
axis.line = element_line(colour = "black")) + | |
ylab(label = name) + | |
theme( | |
axis.title.y = element_text(size = 16) | |
) + | |
theme(legend.position = "none") | |
return(p) | |
} | |
SCStatePlot <- function( | |
matrices, | |
celltype, | |
show.legend = F | |
) { | |
df <- matrices[[celltype]] | |
df$E4 <- df$E4 + df$E6 + df$E7 + df$E8 + df$E9 | |
#df$E4 <- NULL | |
df$E6 <- NULL | |
df$E7 <- NULL | |
df$E8 <- NULL | |
df$E9 <- NULL | |
colnames(df)[[4]] <- "NA" | |
df$E1 <- df$E1 + df$E2 | |
df$E2 <- NULL | |
df$E3 <- df$E3 + df$E5 | |
df$E5 <- NULL | |
df$E11 <- df$E11 + df$E12 | |
df$E12 <- NULL | |
#colnames(df) <- c("Repression", "ATAC_only", "NA", "Enhancer", "ATAC+k27ac", "Promoter") | |
#df <- df[, c(1, 8, 10)] | |
#df <- df / rowSums(df) | |
df <- melt(as.matrix(df)) | |
p <- ggplot(df, aes(fill=Var2, y=value, x=Var1)) + | |
geom_bar(position="stack", stat="identity", width = 1) + | |
#scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) + | |
theme_bw() + | |
theme(panel.border = element_blank(), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.title.x = element_blank(), | |
axis.line.x = element_blank(), | |
axis.ticks.x=element_blank(), | |
axis.line = element_line(colour = "black")) + | |
ylab(label = "State Probabilty") | |
if (!show.legend) { | |
p <- p + theme(legend.position = "none") | |
} | |
return(p) | |
} | |
StatePlot <- function( | |
base.path, | |
region, | |
celltype, | |
show.legend = F | |
) { | |
chr.name <- region@seqinfo@seqnames | |
#celltype <- gsub(" ", "_", celltype) | |
file.name <- paste0(base.path, celltype, "_12_", chr.name, "_posterior.txt") | |
print(file.exists(file.name)) | |
df <- read.table(file.name, skip = 1, header = T) | |
start <- region@ranges@start | |
end <- region@ranges@start + region@ranges@width | |
start <- start %/% 200 | |
end <- end %/% 200 | |
df <- df[start:end, ] | |
#df$E1 <- df$E1 + df$E2 + df$E3 | |
#df$E4 <- df$E4 + df$E6 + df$E7 | |
df$E4 <- df$E4 + df$E6 + df$E7 + df$E8 + df$E9 | |
#df$E4 <- NULL | |
df$E6 <- NULL | |
df$E7 <- NULL | |
df$E8 <- NULL | |
df$E9 <- NULL | |
colnames(df)[[4]] <- "NA" | |
df$E1 <- df$E1 + df$E2 | |
df$E2 <- NULL | |
df$E3 <- df$E3 + df$E5 | |
df$E5 <- NULL | |
df$E11 <- df$E11 + df$E12 | |
df$E12 <- NULL | |
#colnames(df) <- c("Repression", "ATAC_only", "NA", "Enhancer", "ATAC+k27ac", "Promoter") | |
#df <- df[, c(1, 8, 10)] | |
#df <- df / rowSums(df) | |
df <- melt(as.matrix(df)) | |
p <- ggplot(df, aes(fill=Var2, y=value, x=Var1)) + | |
geom_bar(position="stack", stat="identity", width = 1) + | |
#scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) + | |
theme_bw() + | |
theme(panel.border = element_blank(), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.title.x = element_blank(), | |
axis.line.x = element_blank(), | |
axis.ticks.x=element_blank(), | |
axis.line = element_line(colour = "black")) + | |
ylab(label = "State Probabilty") | |
if (!show.legend) { | |
p <- p + theme(legend.position = "none") | |
} | |
return(p) | |
} | |
GetCoverage <- function( | |
object, | |
region, | |
group.by, | |
window = 500, | |
extend.upstream = 0, | |
extend.downstream = 0, | |
idents = NULL, | |
max.downsample = 3000, | |
downsample.rate = 0.1 | |
) { | |
cells <- colnames(x = object) | |
assay <- DefaultAssay(object = object) | |
if (!is.null(x = group.by)) { | |
Idents(object = object) <- group.by | |
} | |
if (!is.null(x = idents)) { | |
ident.cells <- WhichCells(object = object, idents = idents) | |
cells <- intersect(x = cells, y = ident.cells) | |
} | |
region <- Signac:::FindRegion( | |
object = object, | |
region = region, | |
assay = assay | |
) | |
reads.per.group <- AverageCounts( | |
object = object, | |
group.by = group.by, | |
verbose = FALSE | |
) | |
cells.per.group <- CellsPerGroup( | |
object = object, | |
group.by = group.by | |
) | |
cutmat <- Signac:::CutMatrix( | |
object = object, | |
region = region, | |
assay = assay, | |
cells = cells, | |
verbose = FALSE | |
) | |
colnames(cutmat) <- start(x = region):end(x = region) | |
group.scale.factors <- suppressWarnings(reads.per.group * cells.per.group) | |
scale.factor <- median(x = group.scale.factors) | |
obj.groups <- Signac:::GetGroups( | |
object = object, | |
group.by = group.by, | |
idents = idents | |
) | |
window.size <- width(x = region) | |
levels.use <- levels(x = obj.groups) | |
coverages <- Signac:::ApplyMatrixByGroup( | |
mat = cutmat, | |
fun = colSums, | |
groups = obj.groups, | |
group.scale.factors = group.scale.factors, | |
scale.factor = scale.factor, | |
normalize = TRUE | |
) | |
if (!is.na(x = window)) { | |
coverages <- group_by(.data = coverages, group) | |
coverages <- mutate(.data = coverages, coverage = roll_sum( | |
x = norm.value, n = window, fill = NA, align = "center" | |
)) | |
coverages <- ungroup(x = coverages) | |
} else { | |
coverages$coverage <- coverages$norm.value | |
} | |
chromosome <- as.character(x = seqnames(x = region)) | |
start.pos <- start(x = region) | |
end.pos <- end(x = region) | |
coverages <- coverages[!is.na(x = coverages$coverage), ] | |
coverages <- group_by(.data = coverages, group) | |
sampling <- min(max.downsample, window.size * downsample.rate) | |
coverages <- slice_sample(.data = coverages, n = sampling) | |
# restore factor levels | |
if (!is.null(x = levels.use)) { | |
colors_all <- hue_pal()(length(x = levels.use)) | |
names(x = colors_all) <- levels.use | |
coverages$group <- factor(x = coverages$group, levels = levels.use) | |
} | |
coverages | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment