Created
September 28, 2020 17:25
-
-
Save explodecomputer/57e83b49f6b2d71fbdb43ca823294cd9 to your computer and use it in GitHub Desktop.
go_kegg_pathways
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(org.Hs.eg.db) | |
# get KEGG pathways | |
xx <- as.list(org.Hs.egPATH2EG) | |
keggpathways <- lapply(names(xx), function(x) { | |
tibble( | |
symbol=select(org.Hs.eg.db, xx[[x]], c("SYMBOL"), "ENTREZID")$SYMBOL, | |
pathway=x | |
) | |
}) %>% bind_rows() | |
# get GO pathways | |
# this will take a bit of time - there are 18000 GO terms or something | |
yy <- as.list(org.Hs.egGO2EG) | |
gopathways <- lapply(names(yy), function(x) { | |
tibble( | |
symbol=select(org.Hs.eg.db, yy[[x]], c("SYMBOL"), "ENTREZID")$SYMBOL, | |
evidence=names(yy[[x]]), | |
pathway=x | |
) | |
}) %>% bind_rows() | |
# create some example data | |
# two protein pairs that are in the same pathways, one which isn't | |
pqtl_data <- data.frame( | |
pqtl1 = c("A2M", "CYP3A4", "abc"), | |
pqtl2 = c("SERPINC1", "TYMP", "cde") | |
) | |
# function to get overlaps | |
get_joint_pathways <- function(pqtl_data, pathwaydata) | |
{ | |
d1 <- inner_join(pqtl_data, pathwaydata, by=c("pqtl1"="symbol")) | |
d1 <- inner_join(d1, pathwaydata, by=c("pqtl2"="symbol")) | |
d2 <- d1 %>% | |
mutate(samepathway = pathway.x==pathway.y) %>% | |
subset(., samepathway) %>% | |
subset(!duplicated(paste(pqtl1, pqtl2))) | |
return(nrow(d2)) | |
} | |
get_joint_pathways(pqtl_data, keggpathways) | |
# function to do permutations | |
# might be slow | |
get_joint_pathways <- function(pqtl_data, pathwaydata, nperm) | |
{ | |
o <- rep(NA, nperm) | |
for(i in 1:nperm) | |
{ | |
pqtl_data$pqtl1 <- sample(pqtl_data$pqtl1) | |
pqtl_data$pqtl2 <- sample(pqtl_data$pqtl2) | |
d1 <- inner_join(pqtl_data, pathwaydata, by=c("pqtl1"="symbol")) | |
d1 <- inner_join(d1, pathwaydata, by=c("pqtl2"="symbol")) | |
d2 <- d1 %>% | |
mutate(samepathway = pathway.x==pathway.y) %>% | |
subset(., samepathway) %>% | |
subset(!duplicated(paste(pqtl1, pqtl2))) | |
o[i] <- nrow(d2) | |
} | |
return(o) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment