Created
June 15, 2017 09:55
-
-
Save kieranrcampbell/310cbf61c65242cda7322ba5246ec56e to your computer and use it in GitHub Desktop.
Brief example of scran's method for "Accounting for technical noise in single-cell RNA-seq experiments"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(stringr) | |
library(ggplot2) | |
library(scran) | |
library(scater) | |
library(dplyr) | |
# Step 1: do the overdispersion analysis | |
sc2 <- sce[rowMeans(round(exprs(sce)) > 0) > 0, ] # Keep only genes that are expressed | |
is_ercc <- grepl("NA_ERCC", featureNames(sc2)) # Your ERCCs might be named differently to mine! | |
count_mat <- counts(sc2) | |
tc <- technicalCV2(count_mat, is_ercc) | |
tc <- mutate(tc, feature = rownames(tc)) %>% tbl_df() | |
tc <- mutate(tc, is_overdisp = str_to_title(as.character(FDR < 0.05)), | |
gene = fData(sc2)$ensembl_gene_id) | |
ggplot(mapping = aes(x = mean, y = cv2)) + | |
geom_point(data = filter(tc, !is_ercc), aes(color = is_overdisp), | |
size = 1, alpha = 0.5) + | |
scale_x_log10() + geom_line(data = tc, mapping = aes(y = trend), color = 'black') + | |
scale_y_log10(limits = c(NA, max(tc$cv2, na.rm = TRUE))) + | |
cowplot::theme_cowplot() + | |
scale_color_brewer(palette = "Set2", name = "Overdispersed") + | |
#geom_point(data = filter(tc, is_ercc), color = "black", | |
# size = 1, alpha = 0.5) + | |
xlab(expression(mu)) + ylab(expression(CV^2)) + | |
theme_cowplot(font_size = 11) + | |
theme(legend.position = "top") | |
# Step 2: GO analysis on overdispersed genes | |
#' @param genes The genes "differentially expressed" or in the condition | |
#' @param all_genes The background gene set | |
go_analysis <- function(genes, all_genes, test_cats = "GO:BP") { | |
library(goseq) | |
all_genes <- all_genes[!is.na(all_genes)] | |
all_genes <- unique(all_genes) # remove duplicates | |
lgl <- 1 * (all_genes %in% genes) | |
names(lgl) <- all_genes | |
pwf <- nullp(lgl, "hg19", "ensGene") | |
go <- goseq(pwf, "hg19", "ensGene", test.cats = test_cats) | |
return(go) | |
} | |
all_genes <- tc$gene | |
od_genes <- filter(tc, is_overdisp == "True") %>% .$gene | |
go <- go_analysis(od_genes, all_genes) | |
go$q_value <- p.adjust(go$over_represented_pvalue, method = "BH") | |
# Step 3: A publication-quality plot of top 6 go terms | |
go_top <- head(go, n = 6) | |
go_top$term <- str_to_title(go_top$term) | |
terms_sorted <- arrange(go_top, desc(q_value)) %>% .$term | |
go_top$term <- factor(go_top$term, levels = terms_sorted) | |
ggplot(go_top, aes(x = term, y = -log10(q_value))) + | |
geom_point() + | |
coord_flip() + | |
theme(axis.title.y = element_blank(), legend.position = "none") + | |
ylab(expression(paste("-", log[10], " q-value"))) + | |
theme(#legend.position = c(-.2, -.14), legend.direction = "horizontal", | |
legend.position = c(0, 0), legend.direction = "horizontal", | |
axis.text = element_text(size = 8), | |
axis.title = element_text(size = 10), | |
legend.title = element_text(size = 11), | |
legend.text = element_text(size = 10)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment