Created
April 6, 2022 23:10
-
-
Save tomsing1/a6d0292dc9311167b233661bb99409a0 to your computer and use it in GitHub Desktop.
Extracting gene sets from the Bioconductor reactome.db R package's SQLite backend with dplyr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Retrieve Reactome sets of Entrez identifiers for a selected species | |
#' | |
#' @param species Scalar character, the species of interest, e.g. `Homo sapiens` | |
#' @importFrom dplyr tbl right_join select collect mutate check_dbplyr | |
#' @importFrom glue glue_sql glue | |
#' @importFrom checkmate assert_choice | |
#' @export | |
#' @return A named list of Entrez identifiers | |
#' @examples | |
#' ReactomeSets("Mycobacterium tuberculosis") | |
ReactomeSets <- function( | |
species = c( | |
"Bos taurus", "Caenorhabditis elegans", "Canis familiaris", | |
"Danio rerio", "Dictyostelium discoideum", | |
"Drosophila melanogaster", "Gallus gallus", "Homo sapiens", | |
"Mus musculus", "Mycobacterium tuberculosis", | |
"Plasmodium falciparum", "Rattus norvegicus", | |
"Saccharomyces cerevisiae", "Schizosaccharomyces pombe", | |
"Sus scrofa", "Xenopus tropicalis")) { | |
species <- match.arg(species, verbose = FALSE) | |
if (!requireNamespace("reactome.db")) { | |
stop(paste("This function requires the suggested 'reactome.db'", | |
"Bioconductor package. Please install it first."), | |
call. = FALSE) | |
} | |
checkmate::assert_flag(verbose) | |
dplyr::check_dbplyr() | |
con <- reactome.db::reactome_dbconn() | |
if (verbose == TRUE) { | |
release <- dplyr::tbl(con, "metadata") %>% | |
dplyr::filter(name == "DBSCHEMAVERSION") %>% | |
dplyr::collect() %>% | |
dplyr::pull("value") | |
message( | |
glue::glue("Retrieving gene sets from reactome release {release}.\n") | |
) | |
} | |
dplyr::tbl(con, "pathway2name") %>% | |
dplyr::filter( | |
glue::glue_sql("path_name LIKE {paste0(species, ': %')}", .con = con) | |
) %>% | |
dplyr::right_join(dplyr::tbl(con, "pathway2gene"), by = "DB_ID") %>% | |
dplyr::select(gene_id, path_name) %>% | |
dplyr::collect() %>% | |
dplyr::mutate(path_name = sub(paste0(species, ': '), '', path_name)) %>% | |
with(., split(gene_id, path_name)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment