Last active
June 7, 2018 17:54
-
-
Save dfjenkins3/521d1a010e0cefce434932584939120d to your computer and use it in GitHub Desktop.
Convert Cell Ranger outs Directory to a SingleCelltkExperiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Convert Cell Ranger outs Directory to a SingleCelltkExperiment | |
#' | |
#' This function creates a SingleCelltkExperiment object from a Cell Ranger | |
#' output directory. The filtered count matrix, pca, tsne, and clustering | |
#' results are stored in the object. This function requires the cellrangerRkit | |
#' package which is not on CRAN or Bioconductor. Install it using the directions | |
#' available here: | |
#' | |
#' https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit | |
#' | |
#' @param path The path to the Cell Ranger outs directory. Defaults to the cwd. | |
#' | |
#' @return a SingleCelltkExperiment object to use in the Single Cell Toolkit. | |
#' raw data, PCA, tSNE, and clustering results are extracted and stored in the | |
#' object. | |
#' | |
#' @export | |
#' | |
cellranger_dir_to_sctke <- function(path=".") { | |
if (!requireNamespace("cellrangerRkit", quietly = TRUE)) { | |
stop("cellrangerRkit needed for this function to work. Please install it. ", | |
"Directions are available here:\nhttps://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit", | |
call. = FALSE) | |
} | |
if(length(list.files(path=file.path(path, "filtered_gene_bc_matrices"))) == 1){ | |
mat_dir <- list.files(path=file.path(path, "filtered_gene_bc_matrices")) | |
barcode_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "barcodes.tsv") | |
genes_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "genes.tsv") | |
matrix_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "matrix.mtx") | |
if(file.exists(file.path(path, "metrics_summary.csv"))){ | |
metrics <- file.path(path, "metrics_summary.csv") | |
} else { | |
metrics <- NULL | |
} | |
incrdata <- cellrangerRkit::load_cellranger_matrix_from_files(mat_fn = matrix_file, | |
gene_fn = genes_file, | |
barcode_fn = barcode_file, | |
summary_fn = metrics) | |
inscedata <- singleCellTK::createSCE(assayFile = as.matrix(exprs(incrdata)), | |
annotFile = Biobase::pData(incrdata), | |
featureFile = Biobase::fData(incrdata), | |
inputDataFrames = T) | |
rm(incrdata) | |
} else { | |
stop("Error while parsing filtered_gene_bc_matrices, expected 1 sub directory.") | |
} | |
# if there is a pca folder | |
if (dir.exists(file.path(path, "analysis", "pca"))){ | |
if(length(list.files(path=file.path(path, "analysis", "pca"))) > 1){ | |
warning("More than one pca components file found. Will consider first only.") | |
} | |
pca_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "pca"))[1] | |
pca_p <- read.csv(file.path(path, "analysis", "pca", pca_dir, "projection.csv"), row.names=1) | |
colnames(pca_p) <- gsub('\\.', '', colnames(pca_p)) | |
pca_v <- read.csv(file.path(path, "analysis", "pca", pca_dir, "variance.csv"), row.names=1) | |
rownames(pca_v) <- colnames(pca_p) | |
reducedDim(inscedata, "PCA_counts") <- as.matrix(pca_p) | |
pcaVariances(inscedata) <- DataFrame(pca_v) | |
} | |
# if there is a tsne folder | |
if (dir.exists(file.path(path, "analysis", "tsne"))){ | |
if(length(list.files(path=file.path(path, "analysis", "tsne"))) > 1){ | |
warning("More than one pca components file found. Will consider first only.") | |
} | |
tsne_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "tsne"))[1] | |
tsne <- read.csv(file.path(path, "analysis", "tsne", tsne_dir, "projection.csv"), row.names=1) | |
colnames(tsne) <- c("X1", "X2") | |
reducedDim(inscedata, "TSNE_counts") <- as.matrix(tsne) | |
} | |
# if there is a clustering folder | |
if (dir.exists(file.path(path, "analysis", "clustering"))){ | |
for(clustmethod in list.files(file.path(path, "analysis", "clustering"))){ | |
clusterin <- read.csv(file.path(path, "analysis", "clustering", clustmethod, "clusters.csv"), row.names=1) | |
colData(inscedata)[, clustmethod] <- as.factor(clusterin$Cluster) | |
} | |
} | |
return(inscedata) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is not in our package because of the warning caused by the special installation of cellrangerRkit. This will change if it is released on CRAN or Bioconductor