Created
July 7, 2023 12:22
-
-
Save ATpoint/5e2e66c7c4eb1bfb06a10522d001522f to your computer and use it in GitHub Desktop.
Get REACTOME terms, translate to mouse
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Retrieve REACTOME terms for human directly from the website, then map to mouse. | |
# Pull human to mouse mappings via biomaRt. | |
library(biomaRt) | |
library(data.table) | |
library(magrittr) | |
library(rtracklayer) | |
library(tidyverse) | |
options(timeout=999) | |
ensembl_version <- 101 | |
reactome_version <- 85 | |
# GTF files as gene_id to gene_name lookup | |
genes_human <- | |
rtracklayer::import(paste0("https://ftp.ensembl.org/pub/release-", ensembl_version, "/gtf/homo_sapiens/Homo_sapiens.GRCh38.101.chr.gtf.gz")) %>% | |
data.frame %>% filter(type=="gene") %>% | |
dplyr::select(gene_id, gene_name, gene_biotype) | |
genes_mouse <- | |
rtracklayer::import(paste0("https://ftp.ensembl.org/pub/release-", ensembl_version, "/gtf/mus_musculus/Mus_musculus.GRCm38.101.chr.gtf.gz")) %>% | |
data.frame %>% filter(type=="gene") %>% | |
dplyr::select(gene_id, gene_name, gene_biotype) | |
mart_human <- biomaRt::useEnsembl("genes", dataset="hsapiens_gene_ensembl", version=ensembl_version) | |
mart_mouse <- biomaRt::useEnsembl("genes", dataset="mmusculus_gene_ensembl", version=ensembl_version) | |
human_mouse <- | |
biomaRt::getLDS(attributes=c("ensembl_gene_id", "hgnc_symbol"), | |
attributesL=c("ensembl_gene_id", "mgi_symbol"), | |
mart=mart_human, martL=mart_mouse, | |
uniqueRows=TRUE) %>% | |
magrittr::set_colnames(c("human_id", "human_name", "mouse_id", "mouse_name")) %>% | |
unique | |
# Get REACTOME | |
url_reactome <- paste0("https://reactome.org/download/", reactome_version, "/ReactomePathways.gmt.zip") | |
temp <- tempfile() | |
download.file(url_reactome, temp) | |
reactome_human_list <- fgsea::gmtPathways(unz(temp, "ReactomePathways.gmt")) | |
unlink(temp) | |
# as data.frame | |
reactome_human <- | |
lapply(names(reactome_human_list), function(x){ | |
data.frame(pathway=x, gene_name=reactome_human_list[[x]], check.names=FALSE) | |
}) %>% | |
do.call(rbind, .) %>% | |
dplyr::left_join(x=., y=genes_human, by=c("gene_name"), relationship = "many-to-many") %>% | |
dplyr::filter(!is.na(gene_name) & !gene_name=="" & !is.na(gene_id) & !gene_id=="") %>% | |
dplyr::mutate(gene=paste(gene_id, gene_name, sep="_")) %>% | |
dplyr::select(pathway, gene) %>% | |
unique | |
# map human to mouse | |
reactome_mouse <- | |
reactome_human %>% | |
dplyr::left_join(x=., | |
y=human_mouse %>% dplyr::mutate(gene=paste0(human_id, "_", human_name)), | |
by=c("gene"), relationship = "many-to-many") %>% | |
dplyr::filter(!is.na(mouse_id) & !is.na(mouse_name) & !mouse_id=="" & !mouse_name=="") %>% | |
dplyr::mutate(gene=paste0(mouse_id, "_", mouse_name)) %>% | |
dplyr::select(pathway, gene) %>% | |
unique | |
save_that <- c("genes_human", "genes_mouse", "human_mouse", "reactome_human", "reactome_mouse") | |
lapply(save_that, function(x){ | |
data.table::fwrite(x=get(x), file=paste0(x, ".txt.gz"), col.names=TRUE, row.names=FALSE, | |
sep="\t", quote=FALSE, compress="gzip") | |
NULL | |
}) %>% invisible |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment