Created
March 27, 2019 17:08
-
-
Save kgturner/f44a9dc6e3417794ed1f433a96a2cc7a to your computer and use it in GitHub Desktop.
Get large amounts of GBIF.org occurence data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Download occurence data from GBIF - for few to many spp. | |
#3/25/2019 KG Turner with assistance from S. Chamberlain, rOpenSci.org | |
#Register at gbif.org. You will need to use this email address, user name, and password in the following script. | |
#NB: DON'T COMMIT YOUR PASSWORDS. | |
# R version 3.5.3 (2019-03-11) | |
library(rgbif) #1.2.0 | |
####make spp. list#### | |
smallList <- c("Chorispora tenella", "Centaurea diffusa", "Lupinus texensis") | |
#to get synonym keys | |
smallList_key <- sapply(smallList, function(x) name_suggest(x)$key[1], USE.NAMES=FALSE) | |
####GBIF request for a single list (at most 100 spp)#### | |
#you will need to register with GBIF to get username etc. | |
occ_download(paste0("taxonKey = ", paste0(smallList_key, collapse = ",")), | |
"basisOfRecord = PRESERVED_SPECIMEN,LITERATURE", | |
"hasCoordinate = true", | |
"hasGeospatialIssue = false", | |
user = "***", | |
pwd = "***", | |
email = "***" | |
) | |
###for 3 species testlist: | |
# <<gbif download>> | |
# Username: *** | |
# E-mail: *** | |
# Download key: 0002607-190320150433242 | |
#check status of request | |
occ_download_meta(key="0003753-190320150433242") | |
# <<gbif download metadata>> | |
# Status: PREPARING | |
# Format: DWCA | |
# Download key: 0002607-190320150433242 | |
# Created: 2019-03-25T20:15:22.624+0000 | |
# Modified: 2019-03-25T20:15:43.495+0000 | |
# Download link: http://api.gbif.org/v1/occurrence/download/request/0002607-190320150433242.zip | |
# Total records: 1706 | |
# Request: | |
# type: and | |
# predicates: | |
# > type: or | |
# predicates: | |
# - type: equals, key: TAXON_KEY, value: 3044349 | |
# - type: equals, key: TAXON_KEY, value: 3128962 | |
# - type: equals, key: TAXON_KEY, value: 2963880 | |
# > type: or | |
# predicates: | |
# - type: equals, key: BASIS_OF_RECORD, value: PRESERVED_SPECIMEN | |
# - type: equals, key: BASIS_OF_RECORD, value: LITERATURE | |
# > type: equals, key: HAS_COORDINATE, value: true | |
# > type: equals, key: HAS_GEOSPATIAL_ISSUE, value: false | |
####For larger numbers of species...#### | |
#break up long species key lists into ~100 spp. lists | |
#GBIF restrictions: URL call limit 12K characters. Limit 3 requests at at time. | |
####large vector splitting function | |
library(plyr) #1.8.4 | |
plyrChunks <- function(d, n){ | |
is <- seq(from = 1, to = length(d), by = n) | |
if(tail(is, 1) != length(d)) { | |
is <- c(is, length(d)) | |
} | |
chunks <- llply(head(seq_along(is), -1), | |
function(i){ | |
start <- is[i]; | |
end <- is[i+1]-1; | |
d[start:end]}) | |
lc <- length(chunks) | |
td <- tail(d, 1) | |
chunks[[lc]] <- c(chunks[[lc]], td) | |
return(chunks) | |
} | |
#plyrChunks(d = vector, n = size_of_chunks) | |
d <- 1:2030 | |
n <- 100 | |
chunkList <- plyrChunks(d,n) #gives list of smaller vectors | |
#break key vector of more than 300 spp into list of smaller vector chunks | |
chunkList <- plyrChunks(bigList_key,100) #make ~100 spp. sub-lists for a really long species list | |
####GBIF request queing function for >300 spp.#### | |
#for less than 300 spp, faster to call them individually as above | |
#GBIF will only accept three request from a single user at a time. | |
#This function ques your requests and submits #4 when one of #1-3 are done. | |
#For 400 - 500 spp., specify occ_download() calls within the occ_download_queue() call | |
output <- occ_download_queue( | |
occ_download('taxonKey = 3119195', "year = 1976", | |
user = "***", | |
pwd = "***", | |
email = "***"), | |
occ_download('taxonKey = 3119195', "year = 2001", "month <= 8", | |
user = "***", | |
pwd = "***", | |
email = "***"), | |
occ_download("country = NZ", "year = 1999", "month = 3", | |
user = "***", | |
pwd = "***", | |
email = "***"), | |
occ_download("catalogNumber = Bird.27847588", "year = 1998", "month = 2", | |
user = "***", | |
pwd = "***", | |
email = "***") | |
) | |
#download data | |
lapply(output, occ_download_get) | |
####run gbif queueing for really large spp. list#### | |
#loop through many occ_download() calls | |
#input name of list of short key vectors, i.e. chunkList above | |
for (i in chunkList[1:n]){ | |
output <- occ_download_queue( | |
occ_download(paste0("taxonKey = ", paste0(i, collapse = ",")), | |
"basisOfRecord = PRESERVED_SPECIMEN,LITERATURE", | |
"hasCoordinate = true", | |
"hasGeospatialIssue = false", | |
user = "***", | |
pwd = "***", | |
email = "***" | |
)) | |
print(output) | |
} | |
lapply(output, occ_download_meta) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment