Last active
October 22, 2019 11:12
-
-
Save egeulgen/4ad24a97dd5fdaabcf6625c1836d3042 to your computer and use it in GitHub Desktop.
M.musculus pathfindR analysis for issue 22
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
## Project: pathfindR | |
## Script purpose: Try to resolve issue 22 | |
## Date: Oct 15, 2019 | |
## Author: Ege Ulgen | |
################################################## | |
options(stringsAsFactors = FALSE) | |
# Create M.musculus KEGG Gene Sets ---------------------------------------- | |
# Obtain list of M.musculus pathways | |
pathways_list <- KEGGREST::keggList("pathway", "mmu") | |
# Turn the identifiers of KEGGREST into KEGG-style pathway identifiers | |
pathway_codes <- sub("path:", "", names(pathways_list)) | |
# Obtain and parse genes per each pathway | |
genes_by_pathway <- sapply(pathway_codes, function(pwid){ | |
pw <- KEGGREST::keggGet(pwid) | |
pw <- pw[[1]]$GENE[c(FALSE, TRUE)] ## get gene symbols, not descriptions | |
pw <- sub(";.+", "", pw) ## discard any remaining description | |
pw <- pw[grep("^[A-Za-z0-9_-]+(\\@)?$", pw)] ## remove mistaken lines that cannot be gene symbols | |
pw <- unique(toupper(pw)) ## turn to upper case and keep unique symbols | |
pw | |
}) | |
## Form the custom genes object | |
mmu_kegg_genes <- genes_by_pathway | |
mmu_kegg_genes <- mmu_kegg_genes[sapply(mmu_kegg_genes, length) != 0] | |
# saveRDS(mmu_kegg_genes, "../pathfindR/mmu_kegg_genes.RDS") | |
## Form the custom pathways object | |
mmu_kegg_pathways <- KEGGREST::keggList("pathway", "mmu") | |
names(mmu_kegg_pathways) <- sub("path:", "", names(mmu_kegg_pathways)) | |
mmu_kegg_pathways <- sub(" - Mus musculus \\(mouse\\)", "", mmu_kegg_pathways) | |
mmu_kegg_pathways <- mmu_kegg_pathways[names(mmu_kegg_pathways) %in% names(mmu_kegg_genes)] | |
# saveRDS(mmu_kegg_pathways, "../pathfindR/mmu_kegg_pathways.RDS") | |
# Process M.musculus PIN -------------------------------------------------- | |
process_pin <- function(pin_df) { | |
# remove self-interactions | |
pin_df <- pin_df[pin_df$Interactor_A != pin_df$Interactor_B, ] | |
# remove duplicated inteactions (including symmetric ones) | |
pin_df <- unique(t(apply(pin_df, 1, sort))) | |
pin_df <- as.data.frame(pin_df) | |
colnames(pin_df) <- c("Interactor_A", "Interactor_B") | |
return(pin_df) | |
} | |
## https://downloads.thebiogrid.org/BioGRID | |
biogrid_df <- read.delim("misc/issues/issue22/BIOGRID-ORGANISM-Mus_musculus-3.5.177.tab2.txt") | |
## keep only mmu - mmu interactions | |
biogrid_df <- biogrid_df[biogrid_df$Organism.Interactor.A == 10090 & ## mmu code | |
biogrid_df$Organism.Interactor.B == 10090, ] ## mmu code | |
biogrid_pin <- data.frame(Interactor_A = toupper(biogrid_df$Official.Symbol.Interactor.A), | |
Interactor_B = toupper(biogrid_df$Official.Symbol.Interactor.B)) | |
biogrid_pin <- process_pin(biogrid_pin) | |
biogrid_pin <- data.frame(A = biogrid_pin$Interactor_A, | |
pp = "pp", | |
B = biogrid_pin$Interactor_B) | |
write.table(biogrid_pin, | |
"misc/issues/issue22/mmusculusPIN.sif", | |
col.names = FALSE, | |
row.names = FALSE, | |
sep = "\t") | |
sif_path <- normalizePath("misc/issues/issue22/mmusculusPIN.sif") | |
# Run pathfindR ----------------------------------------------------------- | |
library(pathfindR) | |
input_df <- readRDS("misc/issues/issue22/input_df.RDS") | |
input_df$`Gene Symbol` <- toupper(input_df$`Gene Symbol`) | |
output_df <- run_pathfindR(input_df, | |
p_val_threshold = 0.1, | |
enrichment_threshold = 1, | |
human_genes = FALSE, | |
pin_name_path = sif_path, | |
sig_gene_thr = 2, | |
gene_sets = "Custom", | |
custom_genes = mmu_kegg_genes, | |
custom_pathways = mmu_kegg_pathways, | |
output_dir="misc/issues/issue22/mmu_out") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment