Skip to content

Instantly share code, notes, and snippets.

@dfjenkins3
Last active June 7, 2018 17:54
Show Gist options
  • Save dfjenkins3/521d1a010e0cefce434932584939120d to your computer and use it in GitHub Desktop.
Save dfjenkins3/521d1a010e0cefce434932584939120d to your computer and use it in GitHub Desktop.
Convert Cell Ranger outs Directory to a SingleCelltkExperiment
#' Convert Cell Ranger outs Directory to a SingleCelltkExperiment
#'
#' This function creates a SingleCelltkExperiment object from a Cell Ranger
#' output directory. The filtered count matrix, pca, tsne, and clustering
#' results are stored in the object. This function requires the cellrangerRkit
#' package which is not on CRAN or Bioconductor. Install it using the directions
#' available here:
#'
#' https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit
#'
#' @param path The path to the Cell Ranger outs directory. Defaults to the cwd.
#'
#' @return a SingleCelltkExperiment object to use in the Single Cell Toolkit.
#' raw data, PCA, tSNE, and clustering results are extracted and stored in the
#' object.
#'
#' @export
#'
cellranger_dir_to_sctke <- function(path=".") {
if (!requireNamespace("cellrangerRkit", quietly = TRUE)) {
stop("cellrangerRkit needed for this function to work. Please install it. ",
"Directions are available here:\nhttps://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit",
call. = FALSE)
}
if(length(list.files(path=file.path(path, "filtered_gene_bc_matrices"))) == 1){
mat_dir <- list.files(path=file.path(path, "filtered_gene_bc_matrices"))
barcode_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "barcodes.tsv")
genes_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "genes.tsv")
matrix_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "matrix.mtx")
if(file.exists(file.path(path, "metrics_summary.csv"))){
metrics <- file.path(path, "metrics_summary.csv")
} else {
metrics <- NULL
}
incrdata <- cellrangerRkit::load_cellranger_matrix_from_files(mat_fn = matrix_file,
gene_fn = genes_file,
barcode_fn = barcode_file,
summary_fn = metrics)
inscedata <- singleCellTK::createSCE(assayFile = as.matrix(exprs(incrdata)),
annotFile = Biobase::pData(incrdata),
featureFile = Biobase::fData(incrdata),
inputDataFrames = T)
rm(incrdata)
} else {
stop("Error while parsing filtered_gene_bc_matrices, expected 1 sub directory.")
}
# if there is a pca folder
if (dir.exists(file.path(path, "analysis", "pca"))){
if(length(list.files(path=file.path(path, "analysis", "pca"))) > 1){
warning("More than one pca components file found. Will consider first only.")
}
pca_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "pca"))[1]
pca_p <- read.csv(file.path(path, "analysis", "pca", pca_dir, "projection.csv"), row.names=1)
colnames(pca_p) <- gsub('\\.', '', colnames(pca_p))
pca_v <- read.csv(file.path(path, "analysis", "pca", pca_dir, "variance.csv"), row.names=1)
rownames(pca_v) <- colnames(pca_p)
reducedDim(inscedata, "PCA_counts") <- as.matrix(pca_p)
pcaVariances(inscedata) <- DataFrame(pca_v)
}
# if there is a tsne folder
if (dir.exists(file.path(path, "analysis", "tsne"))){
if(length(list.files(path=file.path(path, "analysis", "tsne"))) > 1){
warning("More than one pca components file found. Will consider first only.")
}
tsne_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "tsne"))[1]
tsne <- read.csv(file.path(path, "analysis", "tsne", tsne_dir, "projection.csv"), row.names=1)
colnames(tsne) <- c("X1", "X2")
reducedDim(inscedata, "TSNE_counts") <- as.matrix(tsne)
}
# if there is a clustering folder
if (dir.exists(file.path(path, "analysis", "clustering"))){
for(clustmethod in list.files(file.path(path, "analysis", "clustering"))){
clusterin <- read.csv(file.path(path, "analysis", "clustering", clustmethod, "clusters.csv"), row.names=1)
colData(inscedata)[, clustmethod] <- as.factor(clusterin$Cluster)
}
}
return(inscedata)
}
@dfjenkins3
Copy link
Author

This is not in our package because of the warning caused by the special installation of cellrangerRkit. This will change if it is released on CRAN or Bioconductor

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment