Skip to content

Instantly share code, notes, and snippets.

@k3yavi
Last active March 18, 2021 17:14
Show Gist options
  • Save k3yavi/cf71cbe753d57cb6acf1c1d34c013045 to your computer and use it in GitHub Desktop.
Save k3yavi/cf71cbe753d57cb6acf1c1d34c013045 to your computer and use it in GitHub Desktop.
suppressPackageStartupMessages({
library(Signac)
library(Seurat)
library(Matrix)
library(ggplot2)
library(patchwork)
library(dplyr)
library(RcppRoll)
library(fastmatch)
library(EnsDb.Hsapiens.v86)
library(reshape2)
sourceCpp("~/MyHmm.cpp")
})
# Usage
# data <- LoadData()
# idents <- factor(c("B", "CD4 T", "Mono"))
# HorizontalCoveragePlot(data = data,
# region = "PAX5",
# gname = "PAX5",
# extend.upstream = 5000,
# extend.downstream = 5000,
# state.plot = "/home/srivastavaa/v10_bingjie_cut_n_tag/chromHMM/model_12/POSTERIOR/",
# idents = idents,
# marks = c("asap", "k4me1", "k4me2", "k4me3", "k27me3", "k27ac", "pol2", "apol2")
# )
{
GetALLPosterior <- function(data, roi, num.threads=10) {
qregions <- GetTileRegions(roi)
print("extracting 200bp regions")
data <- GetRegionMatrix(data, qregions)
print("imputing")
imp.matrices <- ImputeCounts(data)
print("extracting tiles")
qregions <- GetTileRegions(roi, combined = F)
initialize <- data$hmm$init # rep(1.153589e-153, 12)
emission <- as.matrix(data$hmm$emission[names(data[1:5])])
transition <- as.matrix(data$hmm$transition)
print("calculating posterior")
GetThreadPosterior <- function(x){
range <- length(qregions) %/% num.threads
start <- x*range + 1
end <- (x+1)*range
probs <- lapply(seq(start, end), function(qid){
string.query <- GRangesToString(qregions[[qid]])
matrices <- lapply(names(data[1:5]), function(mark){
as.matrix(imp.matrices[[mark]]@assays$imputed@data[string.query, Cells(data$az.obj)])
})
posterior.probs <- posterior_fast(initialize, emission, transition, matrices)
posterior.probs <- lapply(posterior.probs, function(prob) {
rownames(prob) <- string.query
prob
})
names(posterior.probs) <- Cells(data$az.obj)
posterior.probs
})
probs
}
all.post <- bplapply(X = seq(0, num.threads-1), FUN = GetThreadPosterior)
all.post <- do.call(c, all.post)
all.post
}
GetPosterior <- function(data, qregion, imp.matrices, full=F) {
initialize <- data$hmm$init #rep(1.153589e-153, 12)
emission <- as.matrix(data$hmm$emission[names(data[1:5])])
transition <- as.matrix(data$hmm$transition)
string.query <- GRangesToString(qregion)
posterior.probs <- pblapply(Cells(data$az.obj), function(qcell.id){
observations <- lapply(names(data[1:5]), function(mark){
df <- data.frame(imp.matrices[[mark]]@assays$imputed@data[string.query, qcell.id])
df[is.na(df[1])] <- 0
df
})
observations <- as.matrix(do.call(cbind.data.frame, observations))
posterior.prob <- posterior(initialize, emission, transition, observations)
if (full) {
posterior.prob
} else {
unlist(apply(posterior.prob, 1, which.max))
}
})
names(posterior.probs) <- Cells(data$az.obj)
length(posterior.probs)
length(posterior.probs[[1]])
posterior.probs
}
GetPosteriorFast <- function(data, qregions, imp.matrices) {
initialize <- data$hmm$init #rep(1.153589e-153, 12)
emission <- as.matrix(data$hmm$emission[names(data[1:5])])
transition <- as.matrix(data$hmm$transition)
if (class(qregions) == "GRanges") {
matrices <- lapply(names(data[1:5]), function(mark){
as.matrix(imp.matrices[[mark]][GRangesToString(qregions), Cells(data$az.obj)])
})
all.probs <- posterior_fast(initialize, emission, transition, matrices)
names(all.probs) <- Cells(data$az.obj)
} else {
all.probs <- pblapply(qregions, function(qregion) {
string.query <- GRangesToString(qregion)
matrices <- lapply(names(data[1:5]), function(mark){
as.matrix(imp.matrices[[mark]][string.query, Cells(data$az.obj)])
})
posterior.probs <- posterior_fast(initialize, emission, transition, matrices)
names(posterior.probs) <- Cells(data$az.obj)
posterior.probs
})
}
all.probs
}
PlotStates <- function(roi.region.id, posterior.probs, az.obj, split=F, full.posterior=F) {
if (full.posterior) {
vals <- lapply(posterior.probs, function(probs) {
if (length(probs) >= roi.region.id) {
probs[roi.region.id, ]
} else {
rep(0.0, 12)
}
})
states <- do.call(rbind.data.frame, vals)
rownames(states) <- names(vals)
dim(states)
colnames(states) <- seq(12)
states$"1" <- states$"1" + states$"2"
states$"3" <- states$"3" + states$"4" + states$"5"
states$"9" <- states$"9" + states$"10" + states$"11" + states$"12"
states$"6" <- states$"6" + states$"7" + states$"8"
states$"2" <- NULL
states$"4" <- NULL
states$"5" <- NULL
states$"10" <- NULL
states$"11" <- NULL
states$"12" <- NULL
states$"7" <- NULL
states$"8" <- NULL
colnames(states) <- c("Promoter", "Enhancer", "NA", "Repression")
states <- states / rowSums(states)
az.obj <- data$az.obj
az.obj[["states"]] <- CreateAssayObject(t(states))
FeaturePlot(az.obj, reduction = "jumap", features = c("Promoter", "Enhancer", "Repression"), ncol = 3) &
scale_colour_gradientn(colours = rev(brewer.pal(n = 5, name = "RdBu")))
} else {
vals <- lapply(posterior.probs, function(probs) {
if (length(probs) >= roi.region.id) {
probs[[roi.region.id]]
} else {
0.0
}
})
states <- unlist(vals)
names(states) <- names(vals)
print(table(states))
az.obj$states <- states
az.obj$states[az.obj$states %in% c(1, 2)] <- "Promoter"
az.obj$states[az.obj$states %in% c(3, 4, 5)] <- "Enhancer"
az.obj$states[az.obj$states %in% c(9, 10, 11, 12)] <- "Repression"
az.obj$states[az.obj$states %in% c(0,6,7,8)] <- NA
az.obj$states <- factor(az.obj$states, levels = c("Promoter", "Enhancer", "Repression", "NA"))
if (split) {
DimPlot(az.obj, reduction = "jumap", group.by = "states", split.by = "states", ncol = 3)
} else {
DimPlot(az.obj, reduction = "jumap", group.by = "states", label = T, repel = T)
}
}
}
GetPlots <- function(roi.region.id, data, imp.matrices) {
plots <- lapply(names(data[1:5]), function(mark) {
obj <- data[[mark]]
DefaultAssay(obj) <- "region"
keep.cells <- obj@assays$region@data[rownames(obj@assays$region)[roi.region.id], ]
keep.cells <- names(keep.cells[keep.cells > 0])
DimPlot(obj, cells.highlight = keep.cells, reduction = "ref.umap") + NoLegend() + ggtitle(mark)
#FeaturePlot(obj, reduction = "ref.umap", features = rownames(obj@assays$region)[roi.region.id]) + NoLegend() + ggtitle(mark)
})
i.plots <- lapply(names(data[1:5]), function(mark) {
obj <- imp.matrices[[mark]]
keep.cells <- obj@assays$imputed@data[roi.region.id, ]
keep.cells <- names(keep.cells[keep.cells > 0])
# DimPlot(obj, cells.highlight = keep.cells, reduction = "jumap", group.by = "celltype.l2") + NoLegend() + ggtitle(mark)
FeaturePlot(obj, reduction = "jumap", features = rownames(obj@assays$imputed@data)[roi.region.id]) + ggtitle(paste0("imputed_", mark))
})
wrap_plots(wrap_plots(plots, ncol = 2), wrap_plots(i.plots, ncol = 2), ncol = 2)
}
ImputeCounts <- function(data, region.data) {
imp.mat <- lapply(names(data[1:5]), function(mark){
anchors <- Matrix(as.matrix(data$anchors[[mark]]), sparse = T)
mat <- region.data[[mark]][, rownames(anchors)]
imp.mat <- t(mat %*% anchors)
norm.mat <- colSums((anchors > 0)*1)
imp.mat <- imp.mat / norm.mat
imp.mat <- t(imp.mat)
missing.cells <- setdiff(Cells(data$az.obj), colnames(imp.mat))
if(length(missing.cells) > 0) {
zeros <- matrix(0, nrow = length(rownames(imp.mat)), ncol = length(missing.cells))
colnames(zeros) <- missing.cells
rownames(zeros) <- rownames(imp.mat)
imp.mat <- cbind(imp.mat, zeros)
}
imp.mat[, Cells(data$az.obj)]
#obj <- data$az.obj
#obj[["imputed"]] <- CreateAssayObject(imp.mat)
#obj
})
names(imp.mat) <- names(data[1:5])
imp.mat
}
ImputeVals <- function(data, feats) {
imp.mat <- lapply(names(data[1:5]), function(mark){
anchors <- Matrix(as.matrix(data$anchors[[mark]]), sparse = T)
mat <- data[[mark]]@assays$region@data[feats, rownames(anchors)]
imp.mat <- t(mat %*% anchors)
norm.mat <- colSums((anchors > 0)*1)
#norm.mat <- ((mat > 0)*1) %*% ((anchors > 0)*1)
imp.mat <- imp.mat / norm.mat
t(imp.mat)
})
names(imp.mat) <- names(data[1:5])
imp.mat
}
GetRegionMatrix <- function(data, qregion) {
region.mat <- lapply(names(data[1:5]), function(mark){
obj <- data[[mark]]
cells <- rownames(Matrix(as.matrix(data$anchors[[mark]]), sparse = T))
mat <- SingleFeatureMatrix(
obj@assays$tiles@fragments[[1]],
qregion,
cells = cells,
verbose = F
)
mat <- mat[, cells]
mat
})
names(region.mat) <- names(data[1:5])
region.mat
}
GetRegionMatrixOld <- function(data, qregion) {
region.mat <- lapply(data[1:5], function(obj){
mat <- SingleFeatureMatrix(
obj@assays$tiles@fragments[[1]],
qregion,
cells = Cells(obj),
verbose = F
)
mat
})
names(region.mat) <- names(data[1:5])
for (mark in names(data[1:5])) {
data[[mark]][["region"]] <- CreateAssayObject(region.mat[[mark]])
}
data
}
GetTileRegion <- function(gname, window=200, extend.upstream = 5000, extend.downstream = 5000) {
region <- Signac:::FindRegion(data[["k4me1"]], region = gname,
extend.upstream=extend.upstream,
extend.downstream=extend.downstream)
start <- region@ranges@start
end <- region@ranges@start + region@ranges@width
start <- start %/% window
end <- end %/% window
qregion <- StringToGRanges( paste0(as.character(region@seqnames@values), "-",
seq(start*window, end*window, window), "-",
seq(start*window + window, end*window + window, window)
)
)
qregion
}
GetTileRegions <- function(regions, window=200, extend.upstream = 5000, extend.downstream = 5000, combined=T) {
if (combined) {
ranges.string <- unlist(lapply(regions, function(region) {
region <- Signac:::FindRegion(region = region, extend.upstream=extend.upstream,
extend.downstream=extend.downstream)
start <- region@ranges@start
end <- region@ranges@start + region@ranges@width
start <- start %/% window
end <- end %/% window
qregion <- paste0(as.character(region@seqnames@values), "-", seq(start*window, end*window, window),
"-", seq(start*window + window, end*window + window, window))
qregion
}))
StringToGRanges(ranges.string)
} else{
lapply(regions, function(region) {
GetTileRegion(region)
})
}
}
GetModelParams <- function(file.path) {
init.model <- read.table(file.path, skip = 1, nrows = 12)
trans.model <- read.table(file.path, skip = 13, nrows = 144)
trans.model <- trans.model[, c("V2", "V3", "V4")]
trans.model <- dcast(trans.model, V2 ~ V3, value.var = 'V4')
trans.model$V2 <- NULL
emi.model <- read.table(file.path, skip = 157)
emi.model <- emi.model[emi.model$V5 == 1, c("V2", "V4", "V6")]
emi.model <- dcast(emi.model, V2 ~ V4, value.var = 'V6')
emi.model$V2 <- NULL
list(
"init" = init.model$V3,
"transition" = trans.model,
"emission" = emi.model
)
}
LoadData <- function() {
k4me1.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me1.rds")
k4me2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me2.rds")
k4me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me3.rds")
k27ac.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27ac.rds")
k27me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27me3.rds")
k4me1.data$disc.ptime <- round(k4me1.data$pseudo.time, 1)
k4me2.data$disc.ptime <- round(k4me2.data$pseudo.time, 1)
k4me3.data$disc.ptime <- round(k4me3.data$pseudo.time, 1)
k27ac.data$disc.ptime <- round(k27ac.data$pseudo.time, 1)
k27me3.data$disc.ptime <- round(k27me3.data$pseudo.time, 1)
DefaultAssay(k4me1.data) <- "tiles"
DefaultAssay(k4me2.data) <- "tiles"
DefaultAssay(k4me3.data) <- "tiles"
DefaultAssay(k27ac.data) <- "tiles"
DefaultAssay(k27me3.data) <- "tiles"
hmm <- GetModelParams("/home/srivastavaa/v10_bingjie_cut_n_tag/chromHMM/model_12/model_12.txt")
hmm$init <- hmm$init + 1.153589e-153
roi <- readRDS("~/test_rds/roi.rds")
roi <- GRangesToString(roi)
cell.types <- names(table(k4me1.data$predicted.celltype.l2))
cell.types <- setdiff(cell.types, c("CD4 Proliferating", "Eryth", "ILC", "Doublet", "NK_CD56bright"))
cell.types
data <- list(
k4me1 = k4me1.data,
k4me2 = k4me2.data,
k4me3 = k4me3.data,
k27ac = k27ac.data,
k27me3 = k27me3.data,
hmm = hmm,
roi = roi,
cell.types = cell.types,
az.obj = readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/anchors/small.azimuth.rds")
)
marks <- c("k4me1", "k4me2", "k4me3", "k27ac", "k27me3")
anchors <- lapply(marks, function(mark){
anchor <- readRDS(paste0("~/v10_bingjie_cut_n_tag/seurat_objects/fwd_anchors/anchors.", mark, ".rds"))
df <- data.frame(anchor@anchors)
df$cell2 <- Cells(data[[mark]])[df$cell2]
df$cell1 <- Cells(data$az.obj)[df$cell1]
colnames(df) <- c("azimuth", "mark", "score")
#df <- df[data[[mark]]$predicted.celltype.l1[df$mark] == data$az.obj$celltype.l1[df$azimuth], ]
df <- df[df$score > 0, ]
score.mat <- dcast(df, mark ~ azimuth, value.var = 'score')
rownames(score.mat) <- score.mat$mark
score.mat$mark <- NULL
score.mat[is.na(score.mat)] <- 0
score.mat
})
names(anchors) <- marks
data[["anchors"]] <- anchors
data
}
LoadFullData <- function() {
k4me1.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me1.rds")
k4me2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me2.rds")
k4me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me3.rds")
k27ac.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27ac.rds")
k27me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27me3.rds")
k9me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k9me3.rds")
poll2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/polII_total.rds")
active.poll2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/polII_ser2.rds")
asap.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/asap.rds")
rna.data <- readRDS("/home/srivastavaa/v8_bingjie_cut_n_tag/seurat_objects/citeseq.rds")
DefaultAssay(k4me1.data) <- "tiles"
DefaultAssay(k4me2.data) <- "tiles"
DefaultAssay(k4me3.data) <- "tiles"
DefaultAssay(k27ac.data) <- "tiles"
DefaultAssay(k27me3.data) <- "tiles"
DefaultAssay(k9me3.data) <- "tiles"
DefaultAssay(poll2.data) <- "tiles"
DefaultAssay(active.poll2.data) <- "tiles"
DefaultAssay(asap.data) <- "ASAP"
list(
k4me1 = k4me1.data,
k4me2 = k4me2.data,
k4me3 = k4me3.data,
k27ac = k27ac.data,
k27me3 = k27me3.data,
k9me3 = k9me3.data,
pol2 = poll2.data,
apol2 = active.poll2.data,
asap = asap.data,
rna = rna.data
)
}
GetMaxIndex <- function(data, mark, cell.type, group.by, region.data) {
#print(table(data[[mark]]@meta.data[group.by]))
keep.cells <- rownames(data[[mark]]@meta.data[group.by][data[[mark]]@meta.data[group.by] == cell.type, , drop=F])
which.max(rowSums(region.data[[mark]][, keep.cells]))
}
SingleFeatureMatrix <- function(
fragment,
features,
cells = NULL,
process_n = 2000,
sep = c("-", "-"),
verbose = TRUE
) {
fragment.path <- GetFragmentData(object = fragment, slot = "path")
if (!is.null(cells)) {
# only look for cells that are in the fragment file
frag.cells <- GetFragmentData(object = fragment, slot = "cells")
# first subset frag.cells
cell.idx <- fmatch(
x = names(x = frag.cells),
table = cells,
nomatch = 0L
) > 0
cells <- frag.cells[cell.idx]
}
tbx <- TabixFile(file = fragment.path)
features <- keepSeqlevels(
x = features,
value = intersect(
x = seqnames(x = features),
y = seqnamesTabix(file = tbx)
),
pruning.mode = "coarse"
)
if (length(x = features) == 0) {
stop("No matching chromosomes found in fragment file.")
}
feature.list <- Signac:::ChunkGRanges(
granges = features,
nchunk = ceiling(x = length(x = features) / process_n)
)
if (verbose) {
message("Extracting reads overlapping genomic regions")
}
#if (nbrOfWorkers() > 1) {
mylapply <- bplapply
#} else {
# mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply)
#}
matrix.parts <- mylapply(
X = feature.list,
FUN = Signac:::PartialMatrix,
tabix = tbx,
cells = cells,
sep = sep
)
# remove any that are NULL (no fragments for any cells in the region)
null.parts <- sapply(X = matrix.parts, FUN = is.null)
matrix.parts <- matrix.parts[!null.parts]
if (is.null(x = cells)) {
all.cells <- unique(
x = unlist(x = lapply(X = matrix.parts, FUN = colnames))
)
matrix.parts <- lapply(
X = matrix.parts,
FUN = AddMissingCells,
cells = all.cells
)
}
featmat <- Reduce(f = rbind, x = matrix.parts)
if (!is.null(x = cells)) {
# cells supplied, rename with cell name from object rather than file
cell.convert <- names(x = cells)
names(x = cell.convert) <- cells
colnames(x = featmat) <- unname(obj = cell.convert[colnames(x = featmat)])
}
# reorder features
feat.str <- GRangesToString(grange = features)
featmat <- featmat[feat.str, ]
return(featmat)
}
PlotSCCoverage <- function(data, data_full, select.roi, gname) {
qregion <- GetTileRegion(select.roi, window=200,
extend.upstream = 5000,
extend.downstream = 5000)
region.data <- GetRegionMatrix(data, qregion)
imp.matrices <- ImputeCounts(data, region.data)
posterior.probs <- GetPosteriorFast(data, qregion, imp.matrices)
celltypes <- c("B", "CD4 T", "CD8 T", "NK", "Mono", "DC")
sc.plot <- lapply(celltypes, function(ct) {
keep.cells <- names(data$az.obj$celltype.l1[data$az.obj$celltype.l1 == ct])
nmat <- Reduce("+", posterior.probs[keep.cells]) / length(keep.cells)
nmat <- nmat / rowSums(nmat)
colnames(nmat) <- paste0("E", seq(12))
data.frame(nmat)
})
names(sc.plot) <- celltypes
HorizontalCoveragePlot(data = data_full,
region = select.roi,
gname = gname,
extend.upstream = 5000,
extend.downstream = 5000,
group.by = "predicted.celltype.l1",
idents = c("B", "CD4 T", "CD8 T", "NK", "Mono", "DC"),
state.plot = "~/v10_bingjie_cut_n_tag/chromHMM/model_12/POSTERIOR/",
sc.state.plot = sc.plot,
marks = c("k4me1", "k4me2", "k4me3", "k27ac", "k27me3", "apol2", "pol2"),
window = 2000
)
}
PlotThings <- function(data, loci, gname,
group.by="predicted.celltype.l1",
window=500,
idents = c("B", "CD4 T", "CD8 T", "DC", "Mono", "NK")) {
p6 <- CoveragePlot(
object = data[["asap"]],
region = loci,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
) + ggtitle('ATAC')
p7 <- CoveragePlot(
object = data[["k27me3"]],
region = loci,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
) + ggtitle('H3K27me3')
p1 <- CoveragePlot(
object = data[["k4me1"]],
region = loci,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
) + ggtitle('H3K4me1')
p2 <- CoveragePlot(
object = data[["k4me2"]],
region = loci,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
) + ggtitle('H3K4me2')
p3 <- CoveragePlot(
object = data[["k4me3"]],
region = loci,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
) + ggtitle('H3K4me3')
p4 <- CoveragePlot(
object = data[["k27ac"]],
region = loci,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
) + ggtitle('H3K27ac')
p5 <- AnnotationPlot(
object = data[["k27ac"]],
region = loci
)
expression.plot <- ExpressionPlot(
object = data[["rna"]],
features = gname,
assay = "RNA",
group.by = group.by,
idents = idents,
)
plotlist = list(p6, p1, p2, p3, p4, p7, p5)
# remove x-axis from all but last plot
for (i in 1:(length(x = plotlist) - 1)) {
plotlist[[i]] <- plotlist[[i]] + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x.bottom = element_blank(),
axis.ticks.x.bottom = element_blank()
)
}
heights = c(10, 10, 10, 10, 10, 10, 1)
widths = c(10, 1)
# align expression plot with the first element in plot list
plotlist[[1]] <- (plotlist[[1]] + expression.plot) +
plot_layout(widths = widths)
plotlist[[2]] <- (plotlist[[2]] + expression.plot) +
plot_layout(widths = widths)
plotlist[[3]] <- (plotlist[[3]] + expression.plot) +
plot_layout(widths = widths)
plotlist[[4]] <- (plotlist[[4]] + expression.plot) +
plot_layout(widths = widths)
plotlist[[5]] <- (plotlist[[5]] + expression.plot) +
plot_layout(widths = widths)
plotlist[[6]] <- (plotlist[[6]] + expression.plot) +
plot_layout(widths = widths)
p2 <- wrap_plots(plotlist[1:5],
heights = heights[1:5],
byrow = T
)
p <- plotlist[[6]] + plotlist[[7]] + guide_area() + plot_layout(
ncol = 2, heights = c(10, 2),
guides = "collect")
design <- "B
A"
wrap_plots(B = p2, A = p, design = design, heights = c(50, 10))
}
HorizontalCoverageTrack <- function(
cutmat,
region,
group.scale.factors,
scale.factor,
obj.groups,
ymax,
downsample.rate,
window = 100,
max.downsample = 3000,
tile = NULL,
mark,
color
) {
window.size <- width(x = region)
levels.use <- levels(x = obj.groups)
coverages <- Signac:::ApplyMatrixByGroup(
mat = cutmat,
fun = colSums,
groups = obj.groups,
group.scale.factors = group.scale.factors,
scale.factor = scale.factor,
normalize = TRUE
)
if (!is.na(x = window)) {
coverages <- group_by(.data = coverages, group)
coverages <- mutate(.data = coverages, coverage = roll_sum(
x = norm.value, n = window, fill = NA, align = "center"
))
coverages <- ungroup(x = coverages)
} else {
coverages$coverage <- coverages$norm.value
}
chromosome <- as.character(x = seqnames(x = region))
start.pos <- start(x = region)
end.pos <- end(x = region)
coverages <- coverages[!is.na(x = coverages$coverage), ]
coverages <- group_by(.data = coverages, group)
sampling <- min(max.downsample, window.size * downsample.rate)
coverages <- slice_sample(.data = coverages, n = sampling)
# restore factor levels
if (!is.null(x = levels.use)) {
colors_all <- hue_pal()(length(x = levels.use))
names(x = colors_all) <- levels.use
coverages$group <- factor(x = coverages$group, levels = levels.use)
}
if (is.null(ymax)) {
ymax <- signif(x = max(coverages$coverage, na.rm = TRUE), digits = 2)
}
ymin <- 0
gr <- GRanges(
seqnames = chromosome,
IRanges(start = start.pos, end = end.pos)
)
if (!is.null(tile)) {
diffs <- coverages$position - tile
coverages$tile <- coverages$position[which.min(diffs)]
}
num.groups <- length(obj.groups)
p <- ggplot(
data = coverages,
mapping = aes(x = position, y = coverage, fill = group)
) + geom_area(stat = "identity") +
geom_hline(yintercept = 0, size = 0.1) +
geom_vline(xintercept = tile, size = 0.1) +
scale_fill_manual(values = rep(color, num.groups)) +
facet_wrap(facets = ~group, strip.position = "top", nrow = 1) +
xlab(label = paste0(chromosome, " position (bp)")) +
ylab(label = paste0(mark)#, " \n(range ",
#as.character(x = ymin), " - ",
#as.character(x = ymax), ")")
) +
ylim(c(ymin, ymax)) +
theme_browser(legend = FALSE) +
theme(axis.text.x = element_text(angle = 90),
strip.text = element_text(size = 20),
#axis.title.y = element_text(size = 16)
panel.spacing = unit(1, "lines")
)
if (!is.null(x = levels.use)) {
p <- p + scale_fill_manual(values = colors_all)
}
return(p)
}
HorizontalExpressionPlot <- function(
object,
features,
assay = NULL,
group.by = NULL,
idents = NULL,
slot = "data"
) {
# get data
assay <- DefaultAssay(object = object)
data.plot <- GetAssayData(
object = object,
assay = assay,
slot = slot
)[features, ]
obj.groups <- Signac:::GetGroups(
object = object,
group.by = group.by,
idents = NULL
)
# if levels set, define colors based on all groups
levels.use <- levels(x = obj.groups)
if (!is.null(x = levels.use)) {
colors_all <- hue_pal()(length(x = levels.use))
names(x = colors_all) <- levels.use
}
if (!is.null(x = idents)) {
cells.keep <- names(x = obj.groups)[
fmatch(x = obj.groups, table = idents, nomatch = 0L) > 0
]
if (length(x = features) > 1) {
data.plot <- data.plot[, cells.keep]
} else {
data.plot <- data.plot[cells.keep]
}
obj.groups <- obj.groups[cells.keep]
}
# construct data frame
if (length(x = features) == 1) {
df <- data.frame(
gene = features,
expression = data.plot,
group = obj.groups
)
} else {
df <- data.frame()
for (i in features) {
df.1 <- data.frame(
gene = i,
expression = data.plot[i, ],
group = obj.groups
)
df <- rbind(df, df.1)
}
}
p.list <- list()
for (i in seq_along(along.with = features)) {
df.use <- df[df$gene == features[[i]], ]
p <- ggplot(data = df.use, aes(x = expression, y = gene, fill = group)) +
geom_violin(size = 1/4) +
facet_wrap(~group, nrow = 1, strip.position = "right") +
theme_classic() +
scale_y_discrete(position = "top") +
scale_x_continuous(position = "bottom", limits = c(0, NA)) +
theme(
axis.text.y = element_blank(),
axis.text.x = element_text(size = 8),
axis.title.x = element_blank(),
strip.background = element_blank(),
strip.text.y = element_blank(),
legend.position = "none",
panel.spacing = unit(1, "lines")
)
if (!is.null(x = levels.use)) {
p <- p + scale_fill_manual(values = colors_all)
}
p.list[[i]] <- p
}
p <- wrap_plots(p.list, ncol = length(x = p.list))
return(p)
}
HorizontalSingleCoveragePlot <- function(
object,
region,
mark,
color,
twoh.tile.region = NULL,
features = NULL,
assay = NULL,
show.bulk = FALSE,
expression.assay = NULL,
expression.slot = "data",
annotation = TRUE,
peaks = TRUE,
ranges = NULL,
ranges.title = "Ranges",
links = TRUE,
tile = FALSE,
tile.size = 100,
tile.cells = 100,
group.by = NULL,
window = 100,
extend.upstream = 0,
extend.downstream = 0,
ymax = NULL,
scale.factor = NULL,
cells = NULL,
idents = NULL,
sep = c("-", "-"),
heights = NULL,
max.downsample = 3000,
downsample.rate = 0.1
) {
cells <- colnames(x = object)
assay <- DefaultAssay(object = object)
if (!is.null(x = group.by)) {
Idents(object = object) <- group.by
}
if (!is.null(x = idents)) {
ident.cells <- WhichCells(object = object, idents = idents)
cells <- intersect(x = cells, y = ident.cells)
}
region <- Signac:::FindRegion(
object = object,
region = region,
sep = sep,
assay = assay,
extend.upstream = extend.upstream,
extend.downstream = extend.downstream
)
if (!is.null(twoh.tile.region)) {
twoh.tile.region <- start(twoh.tile.region)
}
reads.per.group <- AverageCounts(
object = object,
group.by = group.by,
verbose = FALSE
)
cells.per.group <- CellsPerGroup(
object = object,
group.by = group.by
)
cutmat <- Signac:::CutMatrix(
object = object,
region = region,
assay = assay,
cells = cells,
verbose = FALSE
)
colnames(cutmat) <- start(x = region):end(x = region)
group.scale.factors <- suppressWarnings(reads.per.group * cells.per.group)
scale.factor <- median(x = group.scale.factors)
obj.groups <- Signac:::GetGroups(
object = object,
group.by = group.by,
idents = idents
)
p <- HorizontalCoverageTrack(
cutmat = cutmat,
region = region,
group.scale.factors = group.scale.factors,
scale.factor = scale.factor,
window = window,
ymax = ymax,
obj.groups = obj.groups,
downsample.rate = downsample.rate,
max.downsample = max.downsample,
mark = mark,
color = color,
tile = twoh.tile.region
)
if (!is.null(x = features)) {
ex.plot <- HorizontalExpressionPlot(
object = object,
features = features,
assay = expression.assay,
idents = idents,
group.by = group.by,
slot = expression.slot
)
ex.plot
widths <- c(10, length(x = features))
} else {
ex.plot <- NULL
widths <- NULL
}
if (annotation) {
gene.plot <- AnnotationPlot(object = object, region = region)
} else {
gene.plot <- NULL
}
if (links) {
link.plot <- LinkPlot(object = object, region = region)
} else {
link.plot <- NULL
}
if (peaks) {
peak.plot <- PeakPlot(object = object, region = region)
} else {
peak.plot <- NULL
}
if (!is.null(x = ranges)) {
range.plot <- PeakPlot(
object = object,
region = region,
peaks = ranges,
color = "brown3") +
ylab(ranges.title)
} else {
range.plot <- NULL
}
if (tile) {
# reuse cut matrix
tile.df <- ComputeTile(
cutmatrix = cutmat,
groups = obj.groups,
window = tile.size,
n = tile.cells,
order = "total"
)
tile.plot <- CreateTilePlot(
df = tile.df,
n = tile.cells
)
} else {
tile.plot <- NULL
}
if (show.bulk) {
object$bulk <- "All cells"
reads.per.group <- AverageCounts(
object = object,
group.by = "bulk",
verbose = FALSE
)
cells.per.group <- CellsPerGroup(
object = object,
group.by = "bulk"
)
bulk.scale.factor <- suppressWarnings(reads.per.group * cells.per.group)
bulk.groups <- rep(x = "All cells", length(x = obj.groups))
names(x = bulk.groups) <- names(x = obj.groups)
bulk.plot <- HorizontalCoverageTrack(
cutmat = cutmat,
region = region,
group.scale.factors = bulk.scale.factor,
scale.factor = scale.factor,
window = window,
ymax = ymax,
obj.groups = bulk.groups,
downsample.rate = downsample.rate,
max.downsample = max.downsample
) +
scale_fill_manual(values = "grey") +
ylab("")
} else {
bulk.plot <- NULL
}
nident <- length(x = unique(x = obj.groups))
heights <- c(10, (1 / nident) * 10, 10, 2, 1, 1, 3)
p <- CombineTracks(
plotlist = list(p, bulk.plot, tile.plot, gene.plot,
peak.plot, range.plot, link.plot),
expression.plot = ex.plot,
heights = heights,
widths = widths
)
return(p)
}
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
HorizontalMotifPlot <- function(object, region, motif) {
label = motif
motif.names <- object@assays$tiles@[email protected]
motif <- names(motif.names[grep(motif, motif.names)])
if (length(motif) == 0) { print("ERROR: can't find the motif name") }
mpeaks <- object$tiles@motifs@data[, motif]
mpeaks <- mpeaks[mpeaks > 0]
mpeaks.ranges <- StringToGRanges(names(mpeaks))
if (!inherits(x = region, what = "GRanges")) {
region <- StringToGRanges(regions = region)
}
motif.peaks <-GRangesToString(subsetByOverlaps(mpeaks.ranges, region))
all.peaks <- granges(x = object)
peak.intersect <- subsetByOverlaps(x = all.peaks, ranges = region)
peak.df <- as.data.frame(x = peak.intersect)
peak.df$ctype <-"dimgrey"
rownames(peak.df) <- paste0(peak.df$seqnames, "-", peak.df$start, "-", peak.df$end)
peak.df[motif.peaks, ]$ctype <- "#F8766D"
start.pos <- start(x = region)
end.pos <- end(x = region)
chromosome <- seqnames(x = region)
if (nrow(x = peak.df) > 0) {
peak.df$start[peak.df$start < start.pos] <- start.pos
peak.df$end[peak.df$end > end.pos] <- end.pos
peak.plot <- ggplot(data = peak.df, mapping = aes(color = ctype)) +
geom_segment(aes(x = start, y = 0, xend = end, yend = 0),
size = 2,
data = peak.df)
} else {
# no peaks present in region, make empty panel
peak.plot <- ggplot(data = peak.df)
}
peak.plot <- peak.plot + theme_classic() +
ylab(label = label) +
theme(axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
legend.position = "none",
axis.title.x = element_blank(),
axis.text.x = element_blank()
) +
#xlab(label = paste0(chromosome, " position (bp)")) +
xlim(c(start.pos, end.pos)) +
scale_color_identity()
return(peak.plot)
}
HorizontalTilePlot <- function(object, region, tile) {
label = "tile"
tile.ranges <- StringToGRanges(tile)
if (!inherits(x = region, what = "GRanges")) {
region <- StringToGRanges(regions = region)
}
#peak.df <- as.data.frame(x = region)
#peak.df$ctype <- "dimgrey"
peak.df <- as.data.frame(x = tile.ranges)
peak.df$ctype <- "dimgrey"
#peak.df <- rbind(peak.df, tile.df)
#rownames(peak.df) <- paste0(peak.df$seqnames, "-", peak.df$start, "-", peak.df$end)
start.pos <- start(x = region)
end.pos <- end(x = region)
chromosome <- seqnames(x = region)
if (nrow(x = peak.df) > 0) {
peak.df$start[peak.df$start < start.pos] <- start.pos
peak.df$end[peak.df$end > end.pos] <- end.pos
peak.plot <- ggplot(data = peak.df, mapping = aes(color = ctype)) +
geom_segment(aes(x = start, y = 0, xend = end, yend = 0),
size = 10,
data = peak.df)
} else {
# no peaks present in region, make empty panel
peak.plot <- ggplot(data = peak.df)
}
peak.plot <- peak.plot + theme_classic() +
ylab(label = label) +
theme(axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
legend.position = "none",
axis.title.x = element_blank(),
axis.text.x = element_blank()
) +
#xlab(label = paste0(chromosome, " position (bp)")) +
xlim(c(start.pos, end.pos)) +
scale_color_identity()
return(peak.plot)
}
HorizontalCoveragePlot <- function(data, region,
gname=NULL,
pname=NULL,
motif = NULL,
tile = NULL,
state.plot = NULL,
sc.state.plot = NULL,
time.plot = F,
group.by="predicted.celltype.l1",
window=500,
idents = c("B", "CD4 T", "CD8 T", "DC", "Mono", "NK"),
extend.upstream = 0,
extend.downstream = 0,
marks = c("asap", "k4me1", "k4me2", "k4me3", "k27ac", "k27me3", "k9me3")
) {
num.marks <- length(marks)
colors <- gg_color_hue(num.marks)
region.range <- Signac:::FindRegion(
object = data[["k27me3"]],
region = region,
extend.upstream = extend.upstream,
extend.downstream = extend.downstream
)
region <- GRangesToString(region.range)
marks.map <- c("ATAC", "H3K4me1", "H3K4me2", "H3K4me3", "H3K27me3", "H3K27ac", "PolII", "active_PolII", "H3K9me3")
names(marks.map) <- c("asap", "k4me1", "k4me2", "k4me3", "k27me3", "k27ac", "pol2", "apol2", "k9me3")
if (!is.null(tile)) {
tile = StringToGRanges(tile)
}
plotlist <- lapply(seq(num.marks), function(mark.id) {
mark <- marks[[mark.id]]
HorizontalSingleCoveragePlot( object = data[[mark]],
region = region,
peaks = F,
window = window,
annotation = F,
group.by = group.by,
idents = idents,
twoh.tile.region = tile,
mark = marks.map[[mark]],
color = colors[[mark.id]]
)
})
for (i in 1:(length(x = plotlist))) {
plotlist[[i]] <- plotlist[[i]] + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x.bottom = element_blank(),
axis.ticks.x.bottom = element_blank()
)
}
for (i in 2:(length(x = plotlist))) {
plotlist[[i]] <- plotlist[[i]] + theme(
strip.text.x = element_blank()
)
}
gene_plot <- AnnotationPlot(
object = data[["k27me3"]],
region = region
)
if (!is.null(gname)) {
exp.obj <- data[["rna"]]
DefaultAssay(exp.obj) <- "RNA"
expr_plot <- lapply(gname, function(feat)
HorizontalExpressionPlot(
object = exp.obj,
features = feat,
assay = "RNA",
group.by = group.by,
idents = idents
)
)
expr_plot <- wrap_plots(expr_plot, nrow = length(gname))
} else if (!is.null(pname)) {
expr_plot <- lapply(marks, function(feat) {
exp.obj <- data[[feat]]
DefaultAssay(exp.obj) <- "ADT"
if (feat == "asap") {
if (pname == "CD57") {
pname <- "CD57-Recombinant"
}
ProteinPlot(
object = exp.obj,
name = marks.map[[feat]],
features = pname,
assay = "ADT",
group.by = group.by,
idents = idents
)
} else {
ProteinPlot(
object = exp.obj,
name = marks.map[[feat]],
features = pname,
assay = "ADT",
group.by = group.by,
idents = idents
)
}
})
expr_plot <- wrap_plots(expr_plot, nrow = length(pname))
} else {
print("ERROR: must provide gname or pname")
return (NULL)
}
num.idents <- length(idents)
gene_plots <- lapply(seq(num.idents), function(x) gene_plot)
gene_plots[[1]] <- gene_plots[[1]] + theme(
axis.text.x = element_text(angle = 15),
)
for (i in 2:length(gene_plots)) {
gene_plots[[i]] <- gene_plots[[i]] + theme(
axis.text.y=element_blank(),
axis.text.x = element_text(angle = 15),
axis.line.y=element_blank(),
axis.title.y=element_blank(),
panel.spacing = unit(2, "lines")
)
}
gene_plots <- wrap_plots(gene_plots, nrow = 1)
if (!is.null(motif)) {
motif.plot <- HorizontalMotifPlot(object = data[["k4me1"]],
region = region.range,
motif = motif)
motif_plots <- lapply(seq(num.idents), function(x) motif.plot)
for (i in 2:length(motif_plots)) {
motif_plots[[i]] <- motif_plots[[i]] + theme(
axis.text.y=element_blank(),
axis.line.y=element_blank(),
axis.title.y=element_blank()
)
}
gene_plots <- wrap_plots(motif_plots, nrow = 1) / gene_plots
}
#if (!is.null(tile)) {
# tile.plot <- HorizontalTilePlot(object = data[["k4me1"]],
# region = region.range,
# tile = tile)
# tile_plots <- lapply(seq(num.idents), function(x) tile.plot)
#
# for (i in 2:length(tile_plots)) {
# tile_plots[[i]] <- tile_plots[[i]] + theme(
# axis.text.y=element_blank(),
# axis.line.y=element_blank(),
# axis.title.y=element_blank()
# )
# }
#
# gene_plots <- wrap_plots(tile_plots, nrow = 1) / gene_plots
#}
if (!is.null(state.plot)) {
splots <- lapply(idents, function(x) StatePlot(state.plot, region.range, gsub(" ", "_", x)))
for (i in 2:length(splots)) {
splots[[i]] <- splots[[i]] + theme(
axis.text.y=element_blank(),
axis.line.y=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank()
)
}
splots <- wrap_plots(splots, nrow = 1)
}
if (!is.null(sc.state.plot)) {
sc.splots <- lapply(idents, function(x) SCStatePlot(sc.state.plot, x))
for (i in 2:length(sc.splots)) {
sc.splots[[i]] <- sc.splots[[i]] + theme(
axis.text.y=element_blank(),
axis.line.y=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank()
)
}
sc.splots <- wrap_plots(sc.splots, nrow = 1)
}
if (!time.plot) {
if(is.null(state.plot)) {
plotlist[[num.marks + 1]] <- gene_plots
plotlist[[num.marks + 2]] <- expr_plot
wrap_plots(plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10))
} else if(!is.null(sc.state.plot)) {
plotlist[[num.marks + 1]] <- gene_plots
plotlist[[num.marks + 2]] <- splots
plotlist[[num.marks + 3]] <- sc.splots
plotlist[[num.marks + 4]] <- expr_plot
wrap_plots( plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10, 10, 10))
} else {
plotlist[[num.marks + 1]] <- gene_plots
plotlist[[num.marks + 2]] <- splots
plotlist[[num.marks + 3]] <- expr_plot
wrap_plots( plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10, 10))
}
} else {
wrap_plots( list(
expr_plot[[1]],
expr_plot[[2]],
expr_plot[[3]],
expr_plot[[4]],
expr_plot[[5]],
expr_plot[[6]],
expr_plot[[7]]
),
nrow = 7,
heights = c(rep(10, 7))
)
}
}
ProteinPlot <- function(
object,
name,
features,
assay = NULL,
group.by = NULL,
idents = NULL
) {
print(name)
ptime <- [email protected][, group.by, drop=F]
ptime <- ptime[ptime[, 1] %in% idents, , drop=F]
#olaps <- data.frame(findOverlaps(StringToGRanges(rownames(object[[assay]]@data)), features))
#features <- olaps$queryHits
ptime$cts <- colSums(object[[assay]]@data[features, , drop=F])[rownames(ptime)]
ptime$time <- object$pseudo.time[rownames(ptime)]
colnames(ptime) <- c("round.ptime", "counts", "pseudo.time")
#ptime <- ptime[ptime$counts > 0, ]
#ptime$categ <- 0
#ptime[ptime$round.ptime == idents[2], ]$categ <- 1
#ptime[ptime$round.ptime == idents[3], ]$categ <- 2
#print(head(ptime))
fit = mgcv::gam(counts ~ s(pseudo.time, k=50), data = ptime)
newx = data.frame(pseudo.time=with(ptime, seq(min(pseudo.time), max(pseudo.time), len=100)))
pred = predict(fit, newdata=newx, se=T)
pred = cbind(pseudo.time=newx, counts=pred$fit)
pred.range <- max(pred$counts) - min(pred$counts)
actual.range <- max(ptime$counts) - min(ptime$counts)
norm <- actual.range / pred.range
pred$counts <- pred$counts - min(pred$counts)
pred$counts <- pred$counts * norm
ptime$round.ptime <- factor(ptime$round.ptime, levels=idents)
p <- ggplot(ptime, aes(x=pseudo.time, y=counts)) +
geom_point(aes(color=round.ptime))
p_smooth <- geom_smooth(data=ptime, se=F, fullrange=TRUE,
method = "gam", formula = y ~ s(x, k = 100))
#p_smooth <- geom_smooth(data=pred, stat='identity')
p <- p + p_smooth +
scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) +
theme_bw() +
theme(panel.border = element_blank(),
#panel.grid.major = element_blank(),
#panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.line.x = element_blank(),
axis.line = element_line(colour = "black")) +
ylab(label = name) +
theme(
axis.title.y = element_text(size = 16)
) +
theme(legend.position = "none")
return(p)
}
SCStatePlot <- function(
matrices,
celltype,
show.legend = F
) {
df <- matrices[[celltype]]
df$E4 <- df$E4 + df$E6 + df$E7 + df$E8 + df$E9
#df$E4 <- NULL
df$E6 <- NULL
df$E7 <- NULL
df$E8 <- NULL
df$E9 <- NULL
colnames(df)[[4]] <- "NA"
df$E1 <- df$E1 + df$E2
df$E2 <- NULL
df$E3 <- df$E3 + df$E5
df$E5 <- NULL
df$E11 <- df$E11 + df$E12
df$E12 <- NULL
#colnames(df) <- c("Repression", "ATAC_only", "NA", "Enhancer", "ATAC+k27ac", "Promoter")
#df <- df[, c(1, 8, 10)]
#df <- df / rowSums(df)
df <- melt(as.matrix(df))
p <- ggplot(df, aes(fill=Var2, y=value, x=Var1)) +
geom_bar(position="stack", stat="identity", width = 1) +
#scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x=element_blank(),
axis.line = element_line(colour = "black")) +
ylab(label = "State Probabilty")
if (!show.legend) {
p <- p + theme(legend.position = "none")
}
return(p)
}
StatePlot <- function(
base.path,
region,
celltype,
show.legend = F
) {
chr.name <- region@seqinfo@seqnames
#celltype <- gsub(" ", "_", celltype)
file.name <- paste0(base.path, celltype, "_12_", chr.name, "_posterior.txt")
print(file.exists(file.name))
df <- read.table(file.name, skip = 1, header = T)
start <- region@ranges@start
end <- region@ranges@start + region@ranges@width
start <- start %/% 200
end <- end %/% 200
df <- df[start:end, ]
#df$E1 <- df$E1 + df$E2 + df$E3
#df$E4 <- df$E4 + df$E6 + df$E7
df$E4 <- df$E4 + df$E6 + df$E7 + df$E8 + df$E9
#df$E4 <- NULL
df$E6 <- NULL
df$E7 <- NULL
df$E8 <- NULL
df$E9 <- NULL
colnames(df)[[4]] <- "NA"
df$E1 <- df$E1 + df$E2
df$E2 <- NULL
df$E3 <- df$E3 + df$E5
df$E5 <- NULL
df$E11 <- df$E11 + df$E12
df$E12 <- NULL
#colnames(df) <- c("Repression", "ATAC_only", "NA", "Enhancer", "ATAC+k27ac", "Promoter")
#df <- df[, c(1, 8, 10)]
#df <- df / rowSums(df)
df <- melt(as.matrix(df))
p <- ggplot(df, aes(fill=Var2, y=value, x=Var1)) +
geom_bar(position="stack", stat="identity", width = 1) +
#scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x=element_blank(),
axis.line = element_line(colour = "black")) +
ylab(label = "State Probabilty")
if (!show.legend) {
p <- p + theme(legend.position = "none")
}
return(p)
}
GetCoverage <- function(
object,
region,
group.by,
window = 500,
extend.upstream = 0,
extend.downstream = 0,
idents = NULL,
max.downsample = 3000,
downsample.rate = 0.1
) {
cells <- colnames(x = object)
assay <- DefaultAssay(object = object)
if (!is.null(x = group.by)) {
Idents(object = object) <- group.by
}
if (!is.null(x = idents)) {
ident.cells <- WhichCells(object = object, idents = idents)
cells <- intersect(x = cells, y = ident.cells)
}
region <- Signac:::FindRegion(
object = object,
region = region,
assay = assay
)
reads.per.group <- AverageCounts(
object = object,
group.by = group.by,
verbose = FALSE
)
cells.per.group <- CellsPerGroup(
object = object,
group.by = group.by
)
cutmat <- Signac:::CutMatrix(
object = object,
region = region,
assay = assay,
cells = cells,
verbose = FALSE
)
colnames(cutmat) <- start(x = region):end(x = region)
group.scale.factors <- suppressWarnings(reads.per.group * cells.per.group)
scale.factor <- median(x = group.scale.factors)
obj.groups <- Signac:::GetGroups(
object = object,
group.by = group.by,
idents = idents
)
window.size <- width(x = region)
levels.use <- levels(x = obj.groups)
coverages <- Signac:::ApplyMatrixByGroup(
mat = cutmat,
fun = colSums,
groups = obj.groups,
group.scale.factors = group.scale.factors,
scale.factor = scale.factor,
normalize = TRUE
)
if (!is.na(x = window)) {
coverages <- group_by(.data = coverages, group)
coverages <- mutate(.data = coverages, coverage = roll_sum(
x = norm.value, n = window, fill = NA, align = "center"
))
coverages <- ungroup(x = coverages)
} else {
coverages$coverage <- coverages$norm.value
}
chromosome <- as.character(x = seqnames(x = region))
start.pos <- start(x = region)
end.pos <- end(x = region)
coverages <- coverages[!is.na(x = coverages$coverage), ]
coverages <- group_by(.data = coverages, group)
sampling <- min(max.downsample, window.size * downsample.rate)
coverages <- slice_sample(.data = coverages, n = sampling)
# restore factor levels
if (!is.null(x = levels.use)) {
colors_all <- hue_pal()(length(x = levels.use))
names(x = colors_all) <- levels.use
coverages$group <- factor(x = coverages$group, levels = levels.use)
}
coverages
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment