Skip to content

Instantly share code, notes, and snippets.

@oganm
Created April 19, 2023 22:09
Show Gist options
  • Save oganm/57b73c912753210db42138de7ab54ca2 to your computer and use it in GitHub Desktop.
Save oganm/57b73c912753210db42138de7ab54ca2 to your computer and use it in GitHub Desktop.
Term replacement
library(RCurl)
library(xml2)
library(dplyr)
library(stringr)
obsolete <- readr::read_tsv('obsolete-terms.txt')
# get our ontology files
ontologies <- system2("ssh" ,paste0(ontology_server," ls ", ontology_path),stdout = TRUE)
ontologies <- ontologies[!grepl('tmp|nifstd',ontologies)]
dir.create('data-raw/ontologies',recursive = TRUE,showWarnings = FALSE)
ontologies = ontologies[ontologies %in% list.files('data-raw/ontologies/')]
ontologies %>% lapply(function(x){
RCurl::scp(ontology_server,file.path(ontology_path,x)) %>%
writeBin(file.path('data-raw/ontologies',x))
})
# find listed alternatives
alternative_terms <- ontologies %>% lapply(function(x){
print(x)
onto <- xml2::read_xml(file.path('data-raw/ontologies/',x))
children <- xml2::xml_children(onto)
child_types <- xml2::xml_name(children)
ontos <- children[child_types == 'Ontology']
classes <- children[child_types == 'Class']
term_links <- xml2::xml_attr(classes,'about')
alternatives <- classes %>% lapply(function(y){
term_children <- xml2::xml_children(y)
term_contents <- xml2::xml_name(term_children)
term_children[term_contents =='hasAlternativeId'] %>% xml2::xml_text()
})
names(alternatives) <- term_links
return(alternatives)
})
names(alternative_terms) <- ontologies
alternatives <- alternative_terms %>% lapply(\(x){
x %>% sapply(length) %>% rep(names(x),.)
}) %>% unlist
to_replace <- alternative_terms %>% lapply(\(x){
x %>% unlist
}) %>% unlist
# find replacements
obsolete$compact_names <- obsolete$ValueUri %>%
stringr::str_extract('(?<=((/obo)|(/efo)|(/ORDO))/).*') %>%
stringr::str_replace('_',':')
obsolete$replacements <- alternatives[match(obsolete$compact_names,to_replace)]
obsolete = obsolete %>% dplyr::select(-compact_names)
readr::write_tsv(obsolete,file = 'obsolete-term-replacements.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment