Skip to content

Instantly share code, notes, and snippets.

@FlukeAndFeather
Created January 2, 2025 22:52
Show Gist options
  • Save FlukeAndFeather/8362731ffdbe2df802f4bd45dbb62c7d to your computer and use it in GitHub Desktop.
Save FlukeAndFeather/8362731ffdbe2df802f4bd45dbb62c7d to your computer and use it in GitHub Desktop.
Query Movebank for taxonomic coverage
library(move2)
library(ritis)
library(sf)
library(tidyverse)
# Download movebank study-level information
movebank_studies <- movebank_download_study_info(
# Filter to studies where data are *visible* (not necessarily downloadable)
i_can_see_data = TRUE,
# Specify the list of attributes we want to query
# See: https://github.com/movebank/movebank-api-doc/blob/master/movebank-api.md
attributes = c("name",
"id",
"taxon_ids", # comma-separated, if multiple
"sensor_type_ids", # comma-separated, if multiple
"number_of_deployments",
"timestamp_first_deployed_location",
"timestamp_last_deployed_location",
"main_location_lat",
"main_location_long",
"license_type",
"i_have_download_access")
)
# Convert to long format. When studies contain multiple taxon_ids and/or
# sensor_type_ids, convert them to multiple rows.
movebank_long <- movebank_studies %>%
separate_longer_delim(taxon_ids, ",") %>%
drop_na(taxon_ids) %>%
separate_longer_delim(sensor_type_ids, ",") %>%
select(name, id, taxon_id = taxon_ids, sensor_type_id = sensor_type_ids)
# Utility functions
# Wrapper-function for ritis::itis_search() to get the taxonomic hierarchy from
# a scientific binomial
# Side note: goodness gracious the ITIS API is frickin' indecipherable
get_rank_hierarchy <- function(taxon) {
itis_search(q = sprintf("nameWOInd:/%s/", taxon))$hierarchySoFarWRanks
}
# Pull out the specific rank (class, order, family, etc) from the string
# returned by ITIS
extract_rank <- function(rank, hierarchy) {
str_extract(hierarchy, sprintf("%s:([A-z]+)", rank), group = 1)
}
# Query ITIS
# Fetch class, order, and family for every taxa
# Note: this queries ITIS seperately for every row. It takes forever.
movebank_taxa <- movebank_long %>%
filter(str_detect(taxon_id, "[A-z]+[ ]{1,1}[A-z]+")) %>%
mutate(rank_hierarchy = map_chr(taxon_id, get_rank_hierarchy),
class = extract_rank("Class", rank_hierarchy),
order = extract_rank("Order", rank_hierarchy),
family = extract_rank("Family", rank_hierarchy))
# A few summary tables
# Studies by class. 3x more birds than mammals.
movebank_taxa %>%
group_by(class) %>%
summarize(n_studies = n_distinct(id), .groups = "drop") %>%
arrange(desc(n_studies))
# Studies by order. Top 3 bird orders: Charadriiformes (shorebirds, gulls,
# auks), Accipitriformes (hawks, eagles, kites), Anseriformes (ducks, geese,
# swans). Top 3 mammal orders: Carnivora (carnivores), Artiodactyla (even-toed
# ungulates), Chiroptera (bats).
movebank_taxa %>%
group_by(class, order) %>%
summarize(n_studies = n_distinct(id), .groups = "drop") %>%
arrange(desc(n_studies))
# A few simple figures
# Number of studies started per year, color-coded by class
movebank_taxa %>%
left_join(select(movebank_studies, id, timestamp_first_deployed_location),
by = "id") %>%
mutate(first_year = lubridate::year(timestamp_first_deployed_location)) %>%
group_by(class, first_year) %>%
summarize(n_studies = n_distinct(id), .groups = "drop") %>%
drop_na() %>%
ggplot(aes(first_year, n_studies)) +
geom_line(aes(color = class)) +
xlim(2008, 2024) +
theme_bw()
# Map of studies
movebank_taxa %>%
left_join(select(movebank_studies, id, main_location),
by = "id") %>%
rename(geom = main_location) %>%
st_as_sf() %>%
ggplot() +
geom_sf(data = rnaturalearth::countries110, fill = NA) +
geom_sf(aes(color = class), size = 0.5, alpha = 0.5) +
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment