source("epmc_search.R")
my_data <- epmc_search(query='gabi-kat')
my_data$data
#> Source: local data frame [207 x 26]
#>
#> id source pmid pmcid
#> 1 25262228 MED 25262228 PMC4265151
#> 2 25779053 MED 25779053 PMC4402529
#> 3 25608465 MED 25608465 PMC4302306
#> 4 25316063 MED 25316063 PMC4265157
#> 5 26019639 MED 26019639 PMC4433792
#> 6 25933420 MED 25933420 PMC4416716
#> 7 25569773 MED 25569773 PMC4287526
#> 8 25806948 MED 25806948 PMC4373807
#> 9 25646734 MED 25646734 PMC4344464
#> 10 25950582 MED 25950582 PMC4423837
#> .. ... ... ... ...
#> Variables not shown: title (chr), authorString (chr), journalTitle (chr),
#> issue (chr), journalVolume (chr), pubYear (chr), journalIssn (chr),
#> pageInfo (chr), pubType (chr), isOpenAccess (chr), inEPMC (chr), inPMC
#> (chr), citedByCount (int), hasReferences (chr), hasTextMinedTerms (chr),
#> hasDbCrossReferences (chr), hasLabsLinks (chr), hasTMAccessionNumbers
#> (chr), luceneScore (chr), doi (chr), tmAccessionTypeList.accessionType
#> (chr), dbCrossReferenceList.dbName (chr)
epmc_search(query='22246381', field='EXT_ID')
#> $query
#> [1] "EXT_ID:22246381"
#>
#> $hitCount
#> [1] 1
#>
#> $data
#> Source: local data frame [1 x 22]
#>
#> id source pmid
#> 1 22246381 MED 22246381
#> Variables not shown: title (chr), authorString (chr), journalTitle (chr),
#> issue (chr), journalVolume (chr), pubYear (chr), journalIssn (chr),
#> pageInfo (chr), pubType (chr), inEPMC (chr), inPMC (chr), citedByCount
#> (int), hasReferences (chr), hasTextMinedTerms (chr),
#> hasDbCrossReferences (chr), hasLabsLinks (chr), hasTMAccessionNumbers
#> (chr), luceneScore (chr), doi (chr)
#Get article metadata for PLOS Medicine by ISSN and examine yearly distribution of PLOS Medicine publications
my_data <- epmc_search(query='1549-1676', field='ISSN', pages = 150)
my_data
#> $query
#> [1] "ISSN:1549-1676"
#>
#> $hitCount
#> [1] 2889
#>
#> $data
#> Source: local data frame [2,889 x 26]
#>
#> id source pmid pmcid
#> 1 25562846 MED 25562846 PMC4285396
#> 2 25562317 MED 25562317 PMC4285401
#> 3 25849433 MED 25849433 PMC4388663
#> 4 26030872 MED 26030872 PMC4452696
#> 5 25668320 MED 25668320 PMC4323109
#> 6 25826682 MED 25826682 PMC4380414
#> 7 25757228 MED 25757228 PMC4355406
#> 8 25826379 MED 25826379 PMC4380465
#> 9 25803642 MED 25803642 PMC4371888
#> 10 25689460 MED 25689460 PMC4331559
#> .. ... ... ... ...
#> Variables not shown: title (chr), authorString (chr), journalTitle (chr),
#> issue (chr), journalVolume (chr), pubYear (chr), journalIssn (chr),
#> pageInfo (chr), pubType (chr), isOpenAccess (chr), inEPMC (chr), inPMC
#> (chr), citedByCount (int), hasReferences (chr), hasTextMinedTerms (chr),
#> hasDbCrossReferences (chr), hasLabsLinks (chr), hasTMAccessionNumbers
#> (chr), luceneScore (chr), doi (chr), tmAccessionTypeList.accessionType
#> (chr), dbCrossReferenceList.dbName (chr)
#encode to date
my_data$data$pubYear <- format(my_data$data$pubYear, format='%Y')
#summary table
tt <- as.data.frame(table(my_data$data$pubYear))
#plot
require(ggplot2)
ggplot(tt, aes(Var1, Freq, group = 1)) + geom_line() + geom_point() + xlab("Year published") + ylab("PLOS Medicine articles registered in Europe PMC")
Last active
August 29, 2015 14:25
-
-
Save njahn82/39f99e3b33ce3cb30c54 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```{r, echo = FALSE} | |
knitr::opts_knit$set(base.url = "/") | |
knitr::opts_chunk$set( | |
comment = "#>", | |
collapse = TRUE, | |
warning = FALSE, | |
message = FALSE, | |
echo = TRUE, | |
fig.width = 9, | |
fig.height = 6, | |
fig.path='' | |
) | |
options(scipen = 0, digits = 2) | |
knitr::knit_hooks$set(inline = function(x) { | |
prettyNum(x, big.mark=" ") | |
}) | |
``` | |
## | |
```{r} | |
source("epmc_search.R") | |
my_data <- epmc_search(query='gabi-kat') | |
my_data$data | |
epmc_search(query='22246381', field='EXT_ID') | |
#Get article metadata for PLOS Medicine by ISSN and examine yearly distribution of PLOS Medicine publications | |
my_data <- epmc_search(query='1549-1676', field='ISSN', pages = 150) | |
my_data | |
#encode to date | |
my_data$data$pubYear <- format(my_data$data$pubYear, format='%Y') | |
#summary table | |
tt <- as.data.frame(table(my_data$data$pubYear)) | |
#plot | |
require(ggplot2) | |
ggplot(tt, aes(Var1, Freq, group = 1)) + geom_line() + geom_point() + xlab("Year published") + ylab("PLOS Medicine articles registered in Europe PMC") | |
``` |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Search Europe PMC publication database | |
#' | |
#' @description This is the main function to search | |
#' Europe PMC RESTful Web Service (\url{http://europepmc.org/RestfulWebService}) | |
#' | |
#' @import httr dplyr jsonlite | |
#' | |
#' @author Najko Jahn \email{najko.jahn@@uni-bielefeld.de} | |
#' | |
#' @param query search query (character vector). See also \url{http://europepmc.org/Help} | |
#' @param field refine by Europe PMC search field | |
#' @param pages Number of pages to be returned. Each page returns 25 records. Default is 10, i.e 250 records will be returned. \url{http://europepmc.org/Help} | |
#' | |
#' @examples \dontrun{ | |
#' #Search articles for 'Gabi-Kat' | |
#' my.data <- epmc_search(query='Gabi-Kat') | |
#' | |
#' #Get article metadata by DOI | |
#' my.data <- epmc_search(query='10.1007/bf00197367', field='DOI') | |
#' | |
#' #Get article metadata by PubMed ID (PMID) | |
#' my.data <- epmc_search(query='22246381', field='EXT_ID') | |
#' | |
#' #Get only PLOS Genetics article with EMBL database references | |
#' my.data <- epmc_search(query='ISSN:1553-7404 HAS_EMBL:y') | |
#' | |
#' #Get article metadata for PLOS Medicine by ISSN and examine yearly | |
#' distribution of PLOS Medicine publications | |
#' my_data <- epmc_search(query='1549-1676', field='ISSN') | |
#' my_data | |
#' #encode to date | |
#' my_data$data$pubYear <- format(my_data$data$pubYear, format='%Y') | |
#' #summary table | |
#' tt <- as.data.frame(table(my_data$data$pubYear)) | |
#' #plot | |
#' require(ggplot2) | |
#' ggplot(tt, aes(Var1, Freq, group = 1)) + geom_line() + geom_point() + xlab("Year published") + ylab("PLOS Medicine articles registered in Europe PMC") | |
#' } | |
#' | |
#' @export | |
#' | |
epmc_search <- function(query = NULL, field = NULL, pages = 10) { | |
# check | |
if (is.null(query)) | |
stop("No query provided") | |
# construct query | |
if (!is.null(field)) { | |
query <- paste(field, ":", query, sep = "") | |
} else { | |
query <- query | |
} | |
# build path | |
path <- paste0("search/query=", query, "&format=json") | |
# request uri | |
doc <- rebi_GET(path = path) | |
# control pageing | |
hitCount <- doc$hitCount | |
if (hitCount == 0) | |
warning(sprintf("There are no citations matching your query: %s", query)) | |
if (hitCount >= pages) { | |
pg <- rebi_pageing(path, hitCount = pages * 25) | |
doc <- lapply(pg, rebi_request_page) | |
} else { | |
pg <- rebi_pageing(path, pages) | |
doc <- lapply(pg, rebi_request_page) | |
} | |
# return | |
list( | |
query = query, hitCount = hitCount, data = dplyr::bind_rows(doc) | |
) | |
} | |
#' Implementing GET method and json parser for EPMC | |
rebi_GET <- function(path = NULL, ...) { | |
if (is.null(path)) | |
stop("Nothing to parse") | |
uri <- "http://www.ebi.ac.uk/europepmc/webservices/rest/" | |
u <- paste0(uri, path) | |
# call api | |
req <- httr::GET(u, ...) | |
# check for http status | |
httr::warn_for_status(req) | |
# load json into r | |
out <- httr::content(req, "text") | |
doc <- jsonlite::fromJSON(out, flatten = TRUE) | |
if (!exists("doc")) | |
stop("No json to parse", call. = FALSE) | |
doc | |
} | |
#' load result list from json as data-frame | |
#' @param x page | |
rebi_request_page <- function(x) { | |
doc <- rebi_GET(x) | |
out <- | |
plyr::ldply(doc$resultList, data.frame, stringsAsFactors = FALSE, .id = NULL) | |
} | |
#' Calculate pages. Each page consists of 25 records. | |
rebi_pageing <- function(path, hitCount) { | |
if (all.equal((hitCount / 25), as.integer(hitCount / 25)) == TRUE) { | |
pages <- 1:(hitCount / 25) | |
} else { | |
pages <- 1:(hitCount / 25 + 1) | |
} | |
sprintf("%s&page=%s", path, pages) | |
} | |
#' Check for API availability | |
rebi_check <- function(req) { | |
if (req$status_code < 400) | |
return(invisible()) | |
stop(http_status(x)$message, "\n", call. = FALSE) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment