Skip to content

Instantly share code, notes, and snippets.

@njahn82
Last active August 29, 2015 14:25
Show Gist options
  • Save njahn82/39f99e3b33ce3cb30c54 to your computer and use it in GitHub Desktop.
Save njahn82/39f99e3b33ce3cb30c54 to your computer and use it in GitHub Desktop.

source("epmc_search.R")
my_data <- epmc_search(query='gabi-kat')  
my_data$data
#> Source: local data frame [207 x 26]
#> 
#>          id source     pmid      pmcid
#> 1  25262228    MED 25262228 PMC4265151
#> 2  25779053    MED 25779053 PMC4402529
#> 3  25608465    MED 25608465 PMC4302306
#> 4  25316063    MED 25316063 PMC4265157
#> 5  26019639    MED 26019639 PMC4433792
#> 6  25933420    MED 25933420 PMC4416716
#> 7  25569773    MED 25569773 PMC4287526
#> 8  25806948    MED 25806948 PMC4373807
#> 9  25646734    MED 25646734 PMC4344464
#> 10 25950582    MED 25950582 PMC4423837
#> ..      ...    ...      ...        ...
#> Variables not shown: title (chr), authorString (chr), journalTitle (chr),
#>   issue (chr), journalVolume (chr), pubYear (chr), journalIssn (chr),
#>   pageInfo (chr), pubType (chr), isOpenAccess (chr), inEPMC (chr), inPMC
#>   (chr), citedByCount (int), hasReferences (chr), hasTextMinedTerms (chr),
#>   hasDbCrossReferences (chr), hasLabsLinks (chr), hasTMAccessionNumbers
#>   (chr), luceneScore (chr), doi (chr), tmAccessionTypeList.accessionType
#>   (chr), dbCrossReferenceList.dbName (chr)

epmc_search(query='22246381', field='EXT_ID')  
#> $query
#> [1] "EXT_ID:22246381"
#> 
#> $hitCount
#> [1] 1
#> 
#> $data
#> Source: local data frame [1 x 22]
#> 
#>         id source     pmid
#> 1 22246381    MED 22246381
#> Variables not shown: title (chr), authorString (chr), journalTitle (chr),
#>   issue (chr), journalVolume (chr), pubYear (chr), journalIssn (chr),
#>   pageInfo (chr), pubType (chr), inEPMC (chr), inPMC (chr), citedByCount
#>   (int), hasReferences (chr), hasTextMinedTerms (chr),
#>   hasDbCrossReferences (chr), hasLabsLinks (chr), hasTMAccessionNumbers
#>   (chr), luceneScore (chr), doi (chr)

#Get article metadata for PLOS Medicine by ISSN and examine yearly distribution of PLOS Medicine publications  
my_data <- epmc_search(query='1549-1676', field='ISSN', pages = 150)  
my_data
#> $query
#> [1] "ISSN:1549-1676"
#> 
#> $hitCount
#> [1] 2889
#> 
#> $data
#> Source: local data frame [2,889 x 26]
#> 
#>          id source     pmid      pmcid
#> 1  25562846    MED 25562846 PMC4285396
#> 2  25562317    MED 25562317 PMC4285401
#> 3  25849433    MED 25849433 PMC4388663
#> 4  26030872    MED 26030872 PMC4452696
#> 5  25668320    MED 25668320 PMC4323109
#> 6  25826682    MED 25826682 PMC4380414
#> 7  25757228    MED 25757228 PMC4355406
#> 8  25826379    MED 25826379 PMC4380465
#> 9  25803642    MED 25803642 PMC4371888
#> 10 25689460    MED 25689460 PMC4331559
#> ..      ...    ...      ...        ...
#> Variables not shown: title (chr), authorString (chr), journalTitle (chr),
#>   issue (chr), journalVolume (chr), pubYear (chr), journalIssn (chr),
#>   pageInfo (chr), pubType (chr), isOpenAccess (chr), inEPMC (chr), inPMC
#>   (chr), citedByCount (int), hasReferences (chr), hasTextMinedTerms (chr),
#>   hasDbCrossReferences (chr), hasLabsLinks (chr), hasTMAccessionNumbers
#>   (chr), luceneScore (chr), doi (chr), tmAccessionTypeList.accessionType
#>   (chr), dbCrossReferenceList.dbName (chr)
#encode to date 
my_data$data$pubYear <- format(my_data$data$pubYear, format='%Y')
#summary table 
tt <- as.data.frame(table(my_data$data$pubYear))
#plot
require(ggplot2)
ggplot(tt, aes(Var1, Freq, group = 1)) + geom_line() + geom_point() + xlab("Year published") + ylab("PLOS Medicine articles registered in Europe PMC")

plot of chunk unnamed-chunk-2

```{r, echo = FALSE}
knitr::opts_knit$set(base.url = "/")
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE,
warning = FALSE,
message = FALSE,
echo = TRUE,
fig.width = 9,
fig.height = 6,
fig.path=''
)
options(scipen = 0, digits = 2)
knitr::knit_hooks$set(inline = function(x) {
prettyNum(x, big.mark=" ")
})
```
##
```{r}
source("epmc_search.R")
my_data <- epmc_search(query='gabi-kat')
my_data$data
epmc_search(query='22246381', field='EXT_ID')
#Get article metadata for PLOS Medicine by ISSN and examine yearly distribution of PLOS Medicine publications
my_data <- epmc_search(query='1549-1676', field='ISSN', pages = 150)
my_data
#encode to date
my_data$data$pubYear <- format(my_data$data$pubYear, format='%Y')
#summary table
tt <- as.data.frame(table(my_data$data$pubYear))
#plot
require(ggplot2)
ggplot(tt, aes(Var1, Freq, group = 1)) + geom_line() + geom_point() + xlab("Year published") + ylab("PLOS Medicine articles registered in Europe PMC")
```
#' Search Europe PMC publication database
#'
#' @description This is the main function to search
#' Europe PMC RESTful Web Service (\url{http://europepmc.org/RestfulWebService})
#'
#' @import httr dplyr jsonlite
#'
#' @author Najko Jahn \email{najko.jahn@@uni-bielefeld.de}
#'
#' @param query search query (character vector). See also \url{http://europepmc.org/Help}
#' @param field refine by Europe PMC search field
#' @param pages Number of pages to be returned. Each page returns 25 records. Default is 10, i.e 250 records will be returned. \url{http://europepmc.org/Help}
#'
#' @examples \dontrun{
#' #Search articles for 'Gabi-Kat'
#' my.data <- epmc_search(query='Gabi-Kat')
#'
#' #Get article metadata by DOI
#' my.data <- epmc_search(query='10.1007/bf00197367', field='DOI')
#'
#' #Get article metadata by PubMed ID (PMID)
#' my.data <- epmc_search(query='22246381', field='EXT_ID')
#'
#' #Get only PLOS Genetics article with EMBL database references
#' my.data <- epmc_search(query='ISSN:1553-7404 HAS_EMBL:y')
#'
#' #Get article metadata for PLOS Medicine by ISSN and examine yearly
#' distribution of PLOS Medicine publications
#' my_data <- epmc_search(query='1549-1676', field='ISSN')
#' my_data
#' #encode to date
#' my_data$data$pubYear <- format(my_data$data$pubYear, format='%Y')
#' #summary table
#' tt <- as.data.frame(table(my_data$data$pubYear))
#' #plot
#' require(ggplot2)
#' ggplot(tt, aes(Var1, Freq, group = 1)) + geom_line() + geom_point() + xlab("Year published") + ylab("PLOS Medicine articles registered in Europe PMC")
#' }
#'
#' @export
#'
epmc_search <- function(query = NULL, field = NULL, pages = 10) {
# check
if (is.null(query))
stop("No query provided")
# construct query
if (!is.null(field)) {
query <- paste(field, ":", query, sep = "")
} else {
query <- query
}
# build path
path <- paste0("search/query=", query, "&format=json")
# request uri
doc <- rebi_GET(path = path)
# control pageing
hitCount <- doc$hitCount
if (hitCount == 0)
warning(sprintf("There are no citations matching your query: %s", query))
if (hitCount >= pages) {
pg <- rebi_pageing(path, hitCount = pages * 25)
doc <- lapply(pg, rebi_request_page)
} else {
pg <- rebi_pageing(path, pages)
doc <- lapply(pg, rebi_request_page)
}
# return
list(
query = query, hitCount = hitCount, data = dplyr::bind_rows(doc)
)
}
#' Implementing GET method and json parser for EPMC
rebi_GET <- function(path = NULL, ...) {
if (is.null(path))
stop("Nothing to parse")
uri <- "http://www.ebi.ac.uk/europepmc/webservices/rest/"
u <- paste0(uri, path)
# call api
req <- httr::GET(u, ...)
# check for http status
httr::warn_for_status(req)
# load json into r
out <- httr::content(req, "text")
doc <- jsonlite::fromJSON(out, flatten = TRUE)
if (!exists("doc"))
stop("No json to parse", call. = FALSE)
doc
}
#' load result list from json as data-frame
#' @param x page
rebi_request_page <- function(x) {
doc <- rebi_GET(x)
out <-
plyr::ldply(doc$resultList, data.frame, stringsAsFactors = FALSE, .id = NULL)
}
#' Calculate pages. Each page consists of 25 records.
rebi_pageing <- function(path, hitCount) {
if (all.equal((hitCount / 25), as.integer(hitCount / 25)) == TRUE) {
pages <- 1:(hitCount / 25)
} else {
pages <- 1:(hitCount / 25 + 1)
}
sprintf("%s&page=%s", path, pages)
}
#' Check for API availability
rebi_check <- function(req) {
if (req$status_code < 400)
return(invisible())
stop(http_status(x)$message, "\n", call. = FALSE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment