Skip to content

Instantly share code, notes, and snippets.

@ATpoint
Created July 7, 2023 12:22
Show Gist options
  • Save ATpoint/5e2e66c7c4eb1bfb06a10522d001522f to your computer and use it in GitHub Desktop.
Save ATpoint/5e2e66c7c4eb1bfb06a10522d001522f to your computer and use it in GitHub Desktop.
Get REACTOME terms, translate to mouse
# Retrieve REACTOME terms for human directly from the website, then map to mouse.
# Pull human to mouse mappings via biomaRt.
library(biomaRt)
library(data.table)
library(magrittr)
library(rtracklayer)
library(tidyverse)
options(timeout=999)
ensembl_version <- 101
reactome_version <- 85
# GTF files as gene_id to gene_name lookup
genes_human <-
rtracklayer::import(paste0("https://ftp.ensembl.org/pub/release-", ensembl_version, "/gtf/homo_sapiens/Homo_sapiens.GRCh38.101.chr.gtf.gz")) %>%
data.frame %>% filter(type=="gene") %>%
dplyr::select(gene_id, gene_name, gene_biotype)
genes_mouse <-
rtracklayer::import(paste0("https://ftp.ensembl.org/pub/release-", ensembl_version, "/gtf/mus_musculus/Mus_musculus.GRCm38.101.chr.gtf.gz")) %>%
data.frame %>% filter(type=="gene") %>%
dplyr::select(gene_id, gene_name, gene_biotype)
mart_human <- biomaRt::useEnsembl("genes", dataset="hsapiens_gene_ensembl", version=ensembl_version)
mart_mouse <- biomaRt::useEnsembl("genes", dataset="mmusculus_gene_ensembl", version=ensembl_version)
human_mouse <-
biomaRt::getLDS(attributes=c("ensembl_gene_id", "hgnc_symbol"),
attributesL=c("ensembl_gene_id", "mgi_symbol"),
mart=mart_human, martL=mart_mouse,
uniqueRows=TRUE) %>%
magrittr::set_colnames(c("human_id", "human_name", "mouse_id", "mouse_name")) %>%
unique
# Get REACTOME
url_reactome <- paste0("https://reactome.org/download/", reactome_version, "/ReactomePathways.gmt.zip")
temp <- tempfile()
download.file(url_reactome, temp)
reactome_human_list <- fgsea::gmtPathways(unz(temp, "ReactomePathways.gmt"))
unlink(temp)
# as data.frame
reactome_human <-
lapply(names(reactome_human_list), function(x){
data.frame(pathway=x, gene_name=reactome_human_list[[x]], check.names=FALSE)
}) %>%
do.call(rbind, .) %>%
dplyr::left_join(x=., y=genes_human, by=c("gene_name"), relationship = "many-to-many") %>%
dplyr::filter(!is.na(gene_name) & !gene_name=="" & !is.na(gene_id) & !gene_id=="") %>%
dplyr::mutate(gene=paste(gene_id, gene_name, sep="_")) %>%
dplyr::select(pathway, gene) %>%
unique
# map human to mouse
reactome_mouse <-
reactome_human %>%
dplyr::left_join(x=.,
y=human_mouse %>% dplyr::mutate(gene=paste0(human_id, "_", human_name)),
by=c("gene"), relationship = "many-to-many") %>%
dplyr::filter(!is.na(mouse_id) & !is.na(mouse_name) & !mouse_id=="" & !mouse_name=="") %>%
dplyr::mutate(gene=paste0(mouse_id, "_", mouse_name)) %>%
dplyr::select(pathway, gene) %>%
unique
save_that <- c("genes_human", "genes_mouse", "human_mouse", "reactome_human", "reactome_mouse")
lapply(save_that, function(x){
data.table::fwrite(x=get(x), file=paste0(x, ".txt.gz"), col.names=TRUE, row.names=FALSE,
sep="\t", quote=FALSE, compress="gzip")
NULL
}) %>% invisible
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment