daskelly · November 30, 2018 17:57
diff --git a/find_specific_markers_parallel.R b/find_specific_markers_parallel.R
 library(tidyverse)
 library(Seurat)
 library(doParallel)
 # Find specific markers for each cell type

 n.cores <- 10
 registerDoParallel(cores=n.cores)

 celltypes <-   # list of cell types (unique ident labels of Seurat object)
 obj <-         # Seurat object

 # First we do all pairwise comparisons and retain any markers that 
 # are even somewhat higher in the cell type of interest
 specific_markers_par <- foreach(i=1:length(celltypes), .combine=rbind) %dopar% {
    celltype1 <- celltypes[i]
    other_types <- setdiff(celltypes, celltype1)
    celltype_markers <- NULL
    for (celltype2 in other_types) {
        message(paste0(celltype1, " * ", celltype2))
        markers <- FindMarkers(obj, ident.1=celltype1, ident.2=celltype2,
            only.pos=TRUE, min.diff.pct=0.05, test.use="roc",
            max.cells.per.ident=2000) %>%
            rownames_to_column("gene") %>%
            mutate(ident.2=celltype2)
        celltype_markers <- rbind(celltype_markers, markers)
    }
    celltype_markers$ident.1 <- celltype1
    celltype_markers
 }

 # In specific_markers data.frame, ident.1 is which cell pop the marker is tagging
 # and ident.2 is which other cell pop we are contrasting with
 #
 # We want markers that are considerably higher in ident.1 than in any other pop.
 # As a basic filter we require any marker for ident.1 to be expressed in <50% of 
 # cells of each of the other cell types.
 # 
 # Here we will filter to get the top 10 markers for each cell type
 topN <- 10
 npop <- length(celltypes)
 final_markers <- mutate(specific_markers_par, pct_diff=pct.1 - pct.2) %>%
    group_by(ident.1, gene) %>%
    filter(n() == npop - 1, all(pct.2 < 0.6)) %>%
    summarize(median_AUC=median(myAUC), pct.1=mean(pct.1), pct.2=mean(pct.2)) %>%
    mutate(cluster=ident.1) %>%
    filter(median_AUC > 0.65) %>%
    group_by(cluster) %>%
    arrange(desc(median_AUC)) %>%
    do(head(., topN))
	library(tidyverse)
	library(Seurat)
	library(doParallel)
	# Find specific markers for each cell type

	n.cores <- 10
	registerDoParallel(cores=n.cores)

	celltypes <- # list of cell types (unique ident labels of Seurat object)
	obj <- # Seurat object

	# First we do all pairwise comparisons and retain any markers that
	# are even somewhat higher in the cell type of interest
	specific_markers_par <- foreach(i=1:length(celltypes), .combine=rbind) %dopar% {
	celltype1 <- celltypes[i]
	other_types <- setdiff(celltypes, celltype1)
	celltype_markers <- NULL
	for (celltype2 in other_types) {
	message(paste0(celltype1, " * ", celltype2))
	markers <- FindMarkers(obj, ident.1=celltype1, ident.2=celltype2,
	only.pos=TRUE, min.diff.pct=0.05, test.use="roc",
	max.cells.per.ident=2000) %>%
	rownames_to_column("gene") %>%
	mutate(ident.2=celltype2)
	celltype_markers <- rbind(celltype_markers, markers)
	}
	celltype_markers$ident.1 <- celltype1
	celltype_markers
	}

	# In specific_markers data.frame, ident.1 is which cell pop the marker is tagging
	# and ident.2 is which other cell pop we are contrasting with
	#
	# We want markers that are considerably higher in ident.1 than in any other pop.
	# As a basic filter we require any marker for ident.1 to be expressed in <50% of
	# cells of each of the other cell types.
	#
	# Here we will filter to get the top 10 markers for each cell type
	topN <- 10
	npop <- length(celltypes)
	final_markers <- mutate(specific_markers_par, pct_diff=pct.1 - pct.2) %>%
	group_by(ident.1, gene) %>%
	filter(n() == npop - 1, all(pct.2 < 0.6)) %>%
	summarize(median_AUC=median(myAUC), pct.1=mean(pct.1), pct.2=mean(pct.2)) %>%
	mutate(cluster=ident.1) %>%
	filter(median_AUC > 0.65) %>%
	group_by(cluster) %>%
	arrange(desc(median_AUC)) %>%
	do(head(., topN))