dfjenkins3 · June 7, 2018 17:54 · dfjenkins3 · Oct 23, 2017
diff --git a/cellranger_dir_to_sctke.R b/cellranger_dir_to_sctke.R
 #' Convert Cell Ranger outs Directory to a SingleCelltkExperiment
 #' 
 #' This function creates a SingleCelltkExperiment object from a Cell Ranger
 #' output directory. The filtered count matrix, pca, tsne, and clustering
 #' results are stored in the object. This function requires the cellrangerRkit
 #' package which is not on CRAN or Bioconductor. Install it using the directions
 #' available here:
 #' 
 #' https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit
 #'
 #' @param path The path to the Cell Ranger outs directory. Defaults to the cwd.
 #'
 #' @return a SingleCelltkExperiment object to use in the Single Cell Toolkit.
 #' raw data, PCA, tSNE, and clustering results are extracted and stored in the
 #' object.
 #' 
 #' @export
 #'
 cellranger_dir_to_sctke <- function(path=".") {
  if (!requireNamespace("cellrangerRkit", quietly = TRUE)) {
    stop("cellrangerRkit needed for this function to work. Please install it. ",
         "Directions are available here:\nhttps://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit",
         call. = FALSE)
  }

  if(length(list.files(path=file.path(path, "filtered_gene_bc_matrices"))) == 1){
    mat_dir <- list.files(path=file.path(path, "filtered_gene_bc_matrices"))
    barcode_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "barcodes.tsv")
    genes_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "genes.tsv")
    matrix_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "matrix.mtx")
    if(file.exists(file.path(path, "metrics_summary.csv"))){
      metrics <- file.path(path, "metrics_summary.csv")
    } else {
      metrics <- NULL
    }
    incrdata <- cellrangerRkit::load_cellranger_matrix_from_files(mat_fn = matrix_file,
                                                                  gene_fn = genes_file,
                                                                  barcode_fn = barcode_file,
                                                                  summary_fn = metrics)
    inscedata <- singleCellTK::createSCE(assayFile = as.matrix(exprs(incrdata)),
                                         annotFile = Biobase::pData(incrdata),
                                         featureFile = Biobase::fData(incrdata),
                                         inputDataFrames = T)
    rm(incrdata)
  } else {
    stop("Error while parsing filtered_gene_bc_matrices, expected 1 sub directory.")
  }

  # if there is a pca folder
  if (dir.exists(file.path(path, "analysis", "pca"))){
    if(length(list.files(path=file.path(path, "analysis", "pca"))) > 1){
      warning("More than one pca components file found. Will consider first only.")
    }
    pca_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "pca"))[1]
    pca_p <- read.csv(file.path(path, "analysis", "pca", pca_dir, "projection.csv"), row.names=1)
    colnames(pca_p) <- gsub('\\.', '', colnames(pca_p))
    pca_v <- read.csv(file.path(path, "analysis", "pca", pca_dir, "variance.csv"), row.names=1)
    rownames(pca_v) <- colnames(pca_p)
    reducedDim(inscedata, "PCA_counts") <- as.matrix(pca_p)
    pcaVariances(inscedata) <- DataFrame(pca_v)
  }

  # if there is a tsne folder
  if (dir.exists(file.path(path, "analysis", "tsne"))){
    if(length(list.files(path=file.path(path, "analysis", "tsne"))) > 1){
      warning("More than one pca components file found. Will consider first only.")
    }
    tsne_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "tsne"))[1]
    tsne <- read.csv(file.path(path, "analysis", "tsne", tsne_dir, "projection.csv"), row.names=1)
    colnames(tsne) <- c("X1", "X2")
    reducedDim(inscedata, "TSNE_counts") <- as.matrix(tsne)
  }

  # if there is a clustering folder
  if (dir.exists(file.path(path, "analysis", "clustering"))){
    for(clustmethod in list.files(file.path(path, "analysis", "clustering"))){
      clusterin <- read.csv(file.path(path, "analysis", "clustering", clustmethod, "clusters.csv"), row.names=1)
      colData(inscedata)[, clustmethod] <- as.factor(clusterin$Cluster)
    }
  }

  return(inscedata)
 }
	#' Convert Cell Ranger outs Directory to a SingleCelltkExperiment
	#'
	#' This function creates a SingleCelltkExperiment object from a Cell Ranger
	#' output directory. The filtered count matrix, pca, tsne, and clustering
	#' results are stored in the object. This function requires the cellrangerRkit
	#' package which is not on CRAN or Bioconductor. Install it using the directions
	#' available here:
	#'
	#' https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit
	#'
	#' @param path The path to the Cell Ranger outs directory. Defaults to the cwd.
	#'
	#' @return a SingleCelltkExperiment object to use in the Single Cell Toolkit.
	#' raw data, PCA, tSNE, and clustering results are extracted and stored in the
	#' object.
	#'
	#' @export
	#'
	cellranger_dir_to_sctke <- function(path=".") {
	if (!requireNamespace("cellrangerRkit", quietly = TRUE)) {
	stop("cellrangerRkit needed for this function to work. Please install it. ",
	"Directions are available here:\nhttps://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/rkit",
	call. = FALSE)
	}

	if(length(list.files(path=file.path(path, "filtered_gene_bc_matrices"))) == 1){
	mat_dir <- list.files(path=file.path(path, "filtered_gene_bc_matrices"))
	barcode_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "barcodes.tsv")
	genes_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "genes.tsv")
	matrix_file <- file.path(path, "filtered_gene_bc_matrices", mat_dir, "matrix.mtx")
	if(file.exists(file.path(path, "metrics_summary.csv"))){
	metrics <- file.path(path, "metrics_summary.csv")
	} else {
	metrics <- NULL
	}
	incrdata <- cellrangerRkit::load_cellranger_matrix_from_files(mat_fn = matrix_file,
	gene_fn = genes_file,
	barcode_fn = barcode_file,
	summary_fn = metrics)
	inscedata <- singleCellTK::createSCE(assayFile = as.matrix(exprs(incrdata)),
	annotFile = Biobase::pData(incrdata),
	featureFile = Biobase::fData(incrdata),
	inputDataFrames = T)
	rm(incrdata)
	} else {
	stop("Error while parsing filtered_gene_bc_matrices, expected 1 sub directory.")
	}

	# if there is a pca folder
	if (dir.exists(file.path(path, "analysis", "pca"))){
	if(length(list.files(path=file.path(path, "analysis", "pca"))) > 1){
	warning("More than one pca components file found. Will consider first only.")
	}
	pca_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "pca"))[1]
	pca_p <- read.csv(file.path(path, "analysis", "pca", pca_dir, "projection.csv"), row.names=1)
	colnames(pca_p) <- gsub('\\.', '', colnames(pca_p))
	pca_v <- read.csv(file.path(path, "analysis", "pca", pca_dir, "variance.csv"), row.names=1)
	rownames(pca_v) <- colnames(pca_p)
	reducedDim(inscedata, "PCA_counts") <- as.matrix(pca_p)
	pcaVariances(inscedata) <- DataFrame(pca_v)
	}

	# if there is a tsne folder
	if (dir.exists(file.path(path, "analysis", "tsne"))){
	if(length(list.files(path=file.path(path, "analysis", "tsne"))) > 1){
	warning("More than one pca components file found. Will consider first only.")
	}
	tsne_dir <- list.files(pattern = "components", path=file.path(path, "analysis", "tsne"))[1]
	tsne <- read.csv(file.path(path, "analysis", "tsne", tsne_dir, "projection.csv"), row.names=1)
	colnames(tsne) <- c("X1", "X2")
	reducedDim(inscedata, "TSNE_counts") <- as.matrix(tsne)
	}

	# if there is a clustering folder
	if (dir.exists(file.path(path, "analysis", "clustering"))){
	for(clustmethod in list.files(file.path(path, "analysis", "clustering"))){
	clusterin <- read.csv(file.path(path, "analysis", "clustering", clustmethod, "clusters.csv"), row.names=1)
	colData(inscedata)[, clustmethod] <- as.factor(clusterin$Cluster)
	}
	}

	return(inscedata)
	}