Created
April 7, 2016 09:04
-
-
Save mtmorgan/ea10d0d424bf7e414d8e064d903f026d to your computer and use it in GitHub Desktop.
Query DisGeNET disease / gene database from R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Query DisGeNET disease / gene database | |
#' | |
#' Based on a script by [email protected], retrieved from | |
#' http://www.disgenet.org/ds/DisGeNET/scripts/disgenet.R on 7 April, | |
#' 2016. This version is meant for interactive use within an R | |
#' session, and makes a single query to DisGeNET rather than one query | |
#' for each input symbol. | |
#' | |
#' @param input: character vector of gene or disease identifiers | |
#' | |
#' @param entity: character(1) specifying that the identifiers are | |
#' either "gene" or "disease" | |
#' | |
#' @param identifier: character(1) specifying the type of | |
#' identifier. If 'entity' is "gene", then 'identifier' can be | |
#' "entrez", or "hgnc". If 'entity' is "disease", 'identifier' can | |
#' be 'cui', 'mesh', or 'omim'. | |
#' | |
#' @examples | |
#' input <- c("CDK1", "CDK1A", "CDK2") | |
#' result <- DisGeNET(input, 'gene', 'hgnc') | |
#' head(result) | |
#' | |
DisGeNET <- function(input, entity = c("gene", "disease"), identifier) { | |
loadNamespace("httr") | |
stopifnot(is.character(input)) | |
entity <- match.arg(entity) | |
stopifnot(is.character(identifier), length(identifier) == 1L) | |
STR <- switch(entity, gene={ | |
if (!identifier %in% c("entrez", "hgnc")) | |
stop("entity='gene' 'identifier' must be 'entrez' or 'hgnc'") | |
if (identifier == "entrez") | |
"c2.geneId" | |
else # identifier = 'hgnc' | |
"c2.name" | |
}, disease={ | |
if (!identifier %in% c("cui", "mesh", "omim")) | |
stop("entity='disease' 'identifier' must be 'cui', 'mesh' or 'omim'") | |
paste0("c1.", identifier) | |
}) | |
url <- "http://www.disgenet.org/oql" | |
terms <- paste(sprintf("'%s'", input), collapse=", ") | |
oql <- paste0( | |
"DEFINE | |
c0='/data/gene_disease_score_onexus', | |
c1='/data/diseases', | |
c2='/data/genes', | |
c3='/data/sources' | |
ON | |
'http://bitbucket.org/janis_pi/disgenet_onexus.git' | |
SELECT | |
c1 (cui, name, diseaseClassName, STY, MESH, omimInt), | |
c2 (geneId, name, uniprotId, description, pathName, pantherName), | |
c0 (score, pmids) | |
FROM | |
c0 | |
WHERE | |
(c3 = 'ALL' AND ", STR, " IN (", terms, ") | |
ORDER BY ", | |
STR, ", c0.score DESC") | |
response <- httr::POST(url, body=oql) | |
httr::stop_for_status(response) | |
tbl <- read.csv(text=httr::content(response), header=TRUE, sep="\t") | |
bad <- !input %in% tbl$c2.name | |
if (any(bad)) | |
warning("entitites not in DisGeNET:\n ", | |
paste(sQuote(input[bad]), collapse=", "), | |
call.=FALSE) | |
tbl | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment