Skip to content

Instantly share code, notes, and snippets.

@egeulgen
Last active October 22, 2019 11:12
Show Gist options
  • Save egeulgen/4ad24a97dd5fdaabcf6625c1836d3042 to your computer and use it in GitHub Desktop.
Save egeulgen/4ad24a97dd5fdaabcf6625c1836d3042 to your computer and use it in GitHub Desktop.
M.musculus pathfindR analysis for issue 22
##################################################
## Project: pathfindR
## Script purpose: Try to resolve issue 22
## Date: Oct 15, 2019
## Author: Ege Ulgen
##################################################
options(stringsAsFactors = FALSE)
# Create M.musculus KEGG Gene Sets ----------------------------------------
# Obtain list of M.musculus pathways
pathways_list <- KEGGREST::keggList("pathway", "mmu")
# Turn the identifiers of KEGGREST into KEGG-style pathway identifiers
pathway_codes <- sub("path:", "", names(pathways_list))
# Obtain and parse genes per each pathway
genes_by_pathway <- sapply(pathway_codes, function(pwid){
pw <- KEGGREST::keggGet(pwid)
pw <- pw[[1]]$GENE[c(FALSE, TRUE)] ## get gene symbols, not descriptions
pw <- sub(";.+", "", pw) ## discard any remaining description
pw <- pw[grep("^[A-Za-z0-9_-]+(\\@)?$", pw)] ## remove mistaken lines that cannot be gene symbols
pw <- unique(toupper(pw)) ## turn to upper case and keep unique symbols
pw
})
## Form the custom genes object
mmu_kegg_genes <- genes_by_pathway
mmu_kegg_genes <- mmu_kegg_genes[sapply(mmu_kegg_genes, length) != 0]
# saveRDS(mmu_kegg_genes, "../pathfindR/mmu_kegg_genes.RDS")
## Form the custom pathways object
mmu_kegg_pathways <- KEGGREST::keggList("pathway", "mmu")
names(mmu_kegg_pathways) <- sub("path:", "", names(mmu_kegg_pathways))
mmu_kegg_pathways <- sub(" - Mus musculus \\(mouse\\)", "", mmu_kegg_pathways)
mmu_kegg_pathways <- mmu_kegg_pathways[names(mmu_kegg_pathways) %in% names(mmu_kegg_genes)]
# saveRDS(mmu_kegg_pathways, "../pathfindR/mmu_kegg_pathways.RDS")
# Process M.musculus PIN --------------------------------------------------
process_pin <- function(pin_df) {
# remove self-interactions
pin_df <- pin_df[pin_df$Interactor_A != pin_df$Interactor_B, ]
# remove duplicated inteactions (including symmetric ones)
pin_df <- unique(t(apply(pin_df, 1, sort)))
pin_df <- as.data.frame(pin_df)
colnames(pin_df) <- c("Interactor_A", "Interactor_B")
return(pin_df)
}
## https://downloads.thebiogrid.org/BioGRID
biogrid_df <- read.delim("misc/issues/issue22/BIOGRID-ORGANISM-Mus_musculus-3.5.177.tab2.txt")
## keep only mmu - mmu interactions
biogrid_df <- biogrid_df[biogrid_df$Organism.Interactor.A == 10090 & ## mmu code
biogrid_df$Organism.Interactor.B == 10090, ] ## mmu code
biogrid_pin <- data.frame(Interactor_A = toupper(biogrid_df$Official.Symbol.Interactor.A),
Interactor_B = toupper(biogrid_df$Official.Symbol.Interactor.B))
biogrid_pin <- process_pin(biogrid_pin)
biogrid_pin <- data.frame(A = biogrid_pin$Interactor_A,
pp = "pp",
B = biogrid_pin$Interactor_B)
write.table(biogrid_pin,
"misc/issues/issue22/mmusculusPIN.sif",
col.names = FALSE,
row.names = FALSE,
sep = "\t")
sif_path <- normalizePath("misc/issues/issue22/mmusculusPIN.sif")
# Run pathfindR -----------------------------------------------------------
library(pathfindR)
input_df <- readRDS("misc/issues/issue22/input_df.RDS")
input_df$`Gene Symbol` <- toupper(input_df$`Gene Symbol`)
output_df <- run_pathfindR(input_df,
p_val_threshold = 0.1,
enrichment_threshold = 1,
human_genes = FALSE,
pin_name_path = sif_path,
sig_gene_thr = 2,
gene_sets = "Custom",
custom_genes = mmu_kegg_genes,
custom_pathways = mmu_kegg_pathways,
output_dir="misc/issues/issue22/mmu_out")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment