Last active
November 6, 2023 04:58
-
-
Save mathzero/0fb271725da8ff54de0508f15eba0c64 to your computer and use it in GitHub Desktop.
This script runs a literature search on PubMed programatically, then pulls some article metadata on citations and altmetrics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages("pubmedR") | |
install.packages("rcrossref") | |
devtools::install_github("ropensci/rAltmetric") | |
install.packages("tidyverse") | |
install.packages("janitor") | |
library(pubmedR) | |
library(rcrossref) | |
library(tidyverse) | |
library(janitor) | |
library(rAltmetric) | |
library(curl) | |
library(readr) | |
# Conduct pubmed search -------------------------------------------------- | |
# Pubmed API key | |
#' Instructions get an API key: | |
#' https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/ | |
#' Register here: https://account.ncbi.nlm.nih.gov/ | |
api_key <- "INSERT_YOUR_API_KEY_HERE" | |
# Write query | |
query <- "COVID-19*[Title/Abstract] OR SARS-CoV-2*[Title/Abstract] | |
AND symptoms*[Title/Abstract] AND persistent*[Title/Abstract] OR | |
long COVID*[Title/Abstract] OR post-COVID syndrome*[Title/Abstract] | |
AND english[LA] AND Journal Article[PT] AND 2020[DP]" | |
# How many docs? | |
res <- pmQueryTotalCount(query = query, api_key = api_key) | |
res$total_count | |
# API request for docs | |
docs <- pmApiRequest(query = query, limit = res$total_count, api_key = api_key) | |
# Convert to DF | |
df <- pmApi2df(docs) %>% janitor::clean_names() | |
# Create local bibtex file ------------------------------------------------------ | |
# turn dois into urls | |
urls <- paste0("https://doi.org/",df$di[!is.na(df$di)]) | |
h <- new_handle() | |
handle_setheaders(h, "accept" = "application/x-bibtex") | |
# iterate through urls and pull bib info, and append to a local file | |
walk(urls[1:10], ~ { # only doing the first 10 here, to save time on the demo | |
curl(., handle = h) %>% | |
readLines(warn = FALSE) %>% | |
write(file = "mybibfile.bib", append = TRUE) | |
}) | |
closeAllConnections() | |
read_delim("mybibfile.bib", delim = "\n") # this will add break lines to your bib file you created | |
# Query crossref for citation data ---------------------------------------- | |
# Get citation counts | |
cites_df=cr_citation_count(df$di,url = "http://www.crossref.org/openurl/", | |
key = "[email protected]",async = FALSE) | |
# Join | |
df <- cbind(df,cites_df) | |
df <- df %>% rename(citations_count=count) | |
# Add altmetrics data ----------------------------------------------------- | |
# Function for querying altmetrics API | |
altmetrics_new <- | |
function(doi = NULL, | |
apikey = NULL, | |
...) { | |
base_url <- "https://api.altmetric.com/v1/" | |
args <- list(key = apikey) | |
request <- | |
httr::GET(paste0(base_url, "doi/",doi)) | |
if(httr::status_code(request) == 404) { | |
stop("No metrics found for object") | |
} else { | |
httr::warn_for_status(request) | |
results <- | |
jsonlite::fromJSON(httr::content(request, as = "text"), flatten = TRUE) | |
results <- rlist::list.flatten(results) | |
class(results) <- "altmetric" | |
results | |
} | |
} | |
### Batch altmetrics query function | |
alm <- function(x){ | |
out <- suppressWarnings(try(altmetric_data(altmetrics_new(doi = x, | |
apikey = '37c9ae22b7979124ea650f3412255bf9')),silent = TRUE)) | |
if(class(out) == "try-error"){ | |
return(NULL) | |
}else{ | |
return(out) | |
} | |
} | |
### Get doi list | |
dois <- df$di[!is.na(df$di)] %>% as.list() | |
# pull altmetrics | |
alt_df <- map_df(dois, alm) | |
# get rid of all these authors! | |
nms=alt_df %>% colnames() | |
nms <- grep("authors",x = nms,value = T) | |
nms <- nms[3:length(nms)] | |
alt_df <- alt_df %>% select(-nms) | |
# join with original data | |
df <- df %>% left_join(alt_df,by=c("di"="doi")) | |
# impute the title | |
df$ti <- stringr::str_to_sentence(df$ti) | |
df$title[is.na(df$title)] <- df$ti[is.na(df$title)] | |
# score to numeric | |
df$score <- as.numeric(df$score) | |
# Summary top papers ------------------------------------------------------ | |
# Citations top 10 | |
df %>% | |
arrange(-citations_count) %>% | |
slice_head(n = 10) %>% select(title,journal,di,authors1,citations_count) | |
# Altmetrics top 10 | |
df %>% | |
arrange(-score) %>% | |
slice_head(n = 10) %>% select(title,journal,di,authors1,score) |
Thanks for sharing your code @mathzero. May I ask what handle configuration you use for your curl call on line 49?
Hi @Gabrielle-p – apologies I left out a couple of lines. I have now added these in (with thanks to @jsgro, who posted the original code that I adapted for this https://github.com/jsgro/learnR/blob/e5d84d1a25321977a6ba90727297999d53794973/misc/createBibFromDOIs.txt#L18)
Great, thank you @mathzero !
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for sharing your code @mathzero. May I ask what handle configuration you use for your curl call on line 49?