Last active
September 28, 2023 13:49
-
-
Save padpadpadpad/daf1ac2995be6878e7e0b067f12387a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# try write script to extract data/code accessibility statement | |
# try with BES journals first. The trick is in grabbing the correct node (whatever that is) | |
mee_url <- 'https://doi.org/10.1111/2041-210X.13585' | |
functionalecology_url <- 'https://doi.org/10.1111/1365-2435.14422' | |
animalecology_url <- 'https://doi.org/10.1111/1365-2656.13983' | |
appliedecology_url <- 'https://doi.org/10.1111/1365-2664.14474' | |
# library | |
library(rvest) | |
library(httr) | |
# write a function to scrape BES journal data accessibility statements | |
get_bes_data_statement <- function(url){ | |
# read in URL | |
page <- rvest::read_html(url) | |
# grab open research statement from paper | |
statement <- html_nodes(page, xpath = '//*[@id="openResearch"]') %>% | |
html_text() | |
# clean it up - remove everything before (and including) data availability statement | |
statement <- gsub('.*DATA AVAILABILITY STATEMENT', '', statement) %>% | |
gsub('\\n', '', .) %>% | |
trimws() | |
return(statement) | |
} | |
# test on a bunch of BES journals | |
get_bes_data_statement(mee_url) | |
get_bes_data_statement(functionalecology_url) | |
get_bes_data_statement(appliedecology_url) | |
get_bes_data_statement(animalecology_url) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment