Skip to content

Instantly share code, notes, and snippets.

@tysonwepprich
Created March 1, 2019 15:52
Show Gist options
  • Save tysonwepprich/748fc05708895a4dcdab2494e61e3238 to your computer and use it in GitHub Desktop.
Save tysonwepprich/748fc05708895a4dcdab2494e61e3238 to your computer and use it in GitHub Desktop.
Reanalysis of monarch trends reported in Boyle et al. 2019
# Start with all Lepidoptera museum records from
# Boyle, J., H. Dalgleish, and J. Puzey. 2019a.
# Data from: Monarch butterfly and milkweed declines substantially predate
# the use of genetically modified crops. Dryad Digital Repository.
# https://datadryad.org/resource/doi:10.5061/dryad.sk37gd2
# these data were downloaded, cleaned, and saved with code provided in Dryad:
# PART 1.1 Filtering museum data.R
lep <- readRDS("lep_records.rds")
library(dplyr)
library(tidyr)
library(ggplot2)
# devtools::install_github("kassambara/ggpubr")
library(ggpubr) # for combining plots
'%!in%' <- function(x,y)!('%in%'(x,y))
# count of all specimens over time
df <- lep %>%
group_by(year) %>%
count()
plot(df)
# Common Lepidoptera families only
fams <- lep %>%
group_by(family) %>%
count() %>%
filter(n > 1000)
# All specimens by year and family
df <- lep %>%
filter(family %in% fams$family) %>%
group_by(year, family) %>%
count()
ggplot(df, aes(x = year, y = n)) +
geom_point() +
facet_wrap(~family) +
theme_bw()
# Butterfly familys by year, including defunct names
df <- lep %>%
filter(family %in% c("Hedylidae", "Hesperiidae", "Danaidae", "Heliconiidae", "Satyridae", "Lycaenidae", "Nymphalidae", "Papilionidae", "Pieridae", "Riodinidae")) %>%
group_by(year, family) %>%
count()
ggplot(df, aes(x = year, y = n)) +
geom_point() +
facet_wrap(~family) +
theme_bw()
#
# df <- lep %>%
# filter(family %in% c("Hesperiidae", "Danaidae", "Heliconiidae", "Satyridae", "Lycaenidae", "Nymphalidae", "Papilionidae", "Pieridae", "Riodinidae")) %>%
# group_by(year) %>%
# count()
#
# ggplot(df, aes(x = year, y = n)) +
# geom_point() +
# theme_bw()
# check numbers in lep
# assume if individualCount == NA, best guess is one
lep$individualCount[which(is.na(lep$individualCount))] <- 1
# reassign one row with ridiculous high number
lep$individualCount[which(lep$individualCount > 99999)] <- 1
# regions used in study
# all others in Eastern USA
west = c("washington", "oregon", "california",
"nevada", "idaho", "utah", "arizona")
exclude = c("alaska", "hawaii")
# as in study, using Eastern population
# this included 1193 records (1191 in study), didn't remove spatial outliers
mon_raw <- lep %>%
filter(genus == "Danaus" & specificEpithet == "plexippus") %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
group_by(region, year) %>%
count()
# results similar if using the sum of reported individuals collected
# i.e. summarise(n = sum(individualCount))
# as in Bartomeus et al 2013 methods, using each recording trip as a count of 1
# assuming that recordedBy x eventDate gives a unique recording trip
mon_trip <- lep %>%
filter(genus == "Danaus" & specificEpithet == "plexippus") %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
group_by(region, year, eventDate, recordedBy) %>%
count() %>%
group_by(region, year) %>%
count() %>%
rename(n = nn)
# standardization by number of other records
# by all Lepidoptera in same region as in study
zlep_raw <- lep %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
group_by(year) %>%
count() %>%
# summarise(n = sum(individualCount)) %>%
rename(alln = n) %>%
mutate(taxon = "Lepidoptera")
# just moths
zmoth_raw <- lep %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
filter(family %!in% c("Hesperiidae", "Danaidae", "Heliconiidae", "Satyridae", "Lycaenidae", "Nymphalidae", "Papilionidae", "Pieridae", "Riodinidae")) %>%
group_by(year) %>%
count() %>%
# summarise(n = sum(individualCount)) %>%
rename(alln = n) %>%
mutate(taxon = "Moths")
# by all butterflies in same region as in study
zbfly_raw <- lep %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
filter(family %in% c("Hesperiidae", "Danaidae", "Heliconiidae", "Satyridae", "Lycaenidae", "Nymphalidae", "Papilionidae", "Pieridae", "Riodinidae")) %>%
group_by(year) %>%
count() %>%
# summarise(n = sum(individualCount)) %>%
rename(alln = n) %>%
mutate(taxon = "Butterflies")
# by all Lepidoptera in same region as in study
# but using count of 1 per recording trip: presence/absence
zlep_trip <- lep %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
group_by(region, year, eventDate, recordedBy) %>%
count() %>%
group_by(year) %>%
count() %>%
rename(alln = nn) %>%
mutate(taxon = "Lepidoptera")
# by all butterflies in same region as in study
# but using count of 1 per recording trip: presence/absence
zbfly_trip <- lep %>%
mutate(region = case_when(stateProvince %in% west ~ "west",
stateProvince %in% exclude ~ "excl",
TRUE ~ "east")) %>%
filter(region == "east") %>%
filter(family %in% c("Hesperiidae", "Danaidae", "Heliconiidae", "Satyridae", "Lycaenidae", "Nymphalidae", "Papilionidae", "Pieridae", "Riodinidae")) %>%
group_by(region, year, eventDate, recordedBy) %>%
count() %>%
group_by(year) %>%
count() %>%
rename(alln = nn) %>%
mutate(taxon = "Butterflies")
# Figures
theme_set(theme_bw(base_size = 18))
# monarch records
# add zeros to missing years
mon_raw <- mon_raw %>% mutate(taxon = "Eastern N.A. Monarch")
missing <- anti_join(data.frame(year = full_seq(mon_raw$year, 1), region = "east"), mon_raw) %>%
mutate(n = 0)
mon_raw <- bind_rows(mon_raw, missing)
mon_raw$taxon[is.na(mon_raw$taxon)] <- "Zeros"
p1 <- ggplot(mon_raw, aes(x = year, y = n, group = taxon, color = taxon, shape = taxon)) +
scale_shape_manual(values=c(16, 21)) +
scale_color_grey(start = .01, end = .02) +
geom_point() +
xlab("Year") +
ylab("Number of records") +
theme(legend.position = c(.25, .85), legend.title = element_blank())
p1
# Lepidoptera vs moth vs butterfly records
all <- bind_rows(zbfly_raw, zlep_raw, zmoth_raw)
all$taxon <- factor(all$taxon, levels=c("Lepidoptera", "Moths", "Butterflies"))
p2 <- ggplot(all, aes(x = year, y = alln, group = taxon, color = taxon)) +
scale_color_brewer(type = "qual", palette = "Dark2") +
geom_point(alpha = .7) +
geom_smooth(method = "gam", se = FALSE, formula = y ~ s(x, bs = "cr", k = 25), method.args = list(family = "poisson(link = log)")) +
xlab("Year") +
ylab("Number of records") +
theme(legend.position = c(.2, .85), legend.title = element_blank())
p2
ratio <- all %>%
group_by(year) %>%
summarise(ratio = alln[taxon == "Butterflies"] / alln[taxon == "Lepidoptera"])
p3 <- ggplot(ratio, aes(x = year, y = ratio)) +
scale_color_brewer(type = "qual", palette = "Dark2") +
geom_point(alpha = .7) +
geom_smooth(color = "black") +
# geom_smooth(method = "gam", formula = y ~ s(x, bs = "cr", k = 25), method.args = list(family = "poisson(link = log)")) +
xlab("Year") +
ylab("Butterfly records /\nLepidoptera records") +
theme(legend.position = c(.2, .85), legend.title = element_blank())
p3
# standardize by all leps as in study
# removing 2 outlier years to match study
# this is very close to Figure 1, but some points slightly off
df <- left_join(mon_raw, zlep_raw, by = c("year")) %>%
filter(year %!in% c(1930, 1931))
p4 <- ggplot(df, aes(x = year, y = n / alln)) +
geom_point(color = "#1b9e77", alpha = .7) +
geom_smooth(color = "#1b9e77") +
xlab("Year") +
ylab("Monarch records /\nLepidoptera records") +
theme(legend.position = c(.15, .9), legend.title = element_blank())
p4
# standardize by butterflies, also with outlier years removed
df <- left_join(mon_raw, zbfly_raw, by = c("year")) %>%
filter(year %!in% c(1930, 1931))
p5 <- ggplot(df, aes(x = year, y = n / alln)) +
scale_color_brewer(type = "qual", palette = "Dark2") +
geom_point(color = "#7570b3", alpha = .7) +
geom_smooth(color = "#7570b3") +
xlab("Year") +
ylab("Monarch records /\nButterfly records") +
theme(legend.position = c(.15, .9), legend.title = element_blank())
p5
# what about Bartomeus method?
# although they use a binomial GLM, I use LOESS to match study and allow nonlinearity
# no outliers if each trip = one presence
missing <- anti_join(data.frame(year = full_seq(mon_trip$year, 1), region = "east"), mon_trip) %>%
mutate(n = 0)
mon_trip <- bind_rows(mon_trip, missing)
df <- left_join(mon_trip, zlep_trip, by = c("year"))
# Standardize by recording trip, all Lepidoptera
p6 <- ggplot(df, aes(x = year, y = n / alln)) +
scale_color_brewer(type = "qual", palette = "Dark2") +
geom_point(color = "#d95f02") +
geom_smooth(color = "#d95f02") +
xlab("Year") +
ylab("Monarch presences /\nLepidoptera trips") +
theme(legend.position = c(.15, .9), legend.title = element_blank())
p6
# Standardize by recording trip, butterflies only
df <- left_join(mon_trip, zbfly_trip, by = c("year"))
p7 <- ggplot(df, aes(x = year, y = n / alln)) +
scale_color_brewer(type = "qual", palette = "Dark2") +
geom_point(color = "#1b9e77") +
geom_smooth(color = "#1b9e77") +
xlab("Year") +
ylab("Monarch presences /\nButterfly trips") +
theme(legend.position = c(.15, .9), legend.title = element_blank())
p7
# Combine figures, updated
p8 <- ggarrange(p4 + rremove("xlab"), p5 + rremove("xlab"), p2, p3,
labels = c("A", "B", "C", "D"),
ncol = 2, nrow = 2, align = "v")
p8
ggsave(plot = p8, filename = "monarchs2.jpg", device = "jpg", width = 12, height = 10.5, units = "in")
ggsave(plot = p4, filename = "monarchsA.jpg", device = "jpg", width = 6, height = 5.25, units = "in")
ggsave(plot = p5, filename = "monarchsB.jpg", device = "jpg", width = 6, height = 5.25, units = "in")
ggsave(plot = p2, filename = "monarchsC.jpg", device = "jpg", width = 6, height = 5.25, units = "in")
ggsave(plot = p3, filename = "monarchsD.jpg", device = "jpg", width = 6, height = 5.25, units = "in")
# alternative with binomial GAM, similar to Bartomeus GLM but allows nonlinear fits
# can change k to adjust wiggliness of curve fit to data
library(mgcv)
df <- left_join(mon_trip, zbfly_trip, by = c("year"))
# df <- left_join(mon_raw, zbfly_raw, by = c("year"))
# df <- left_join(mon_raw, zlep_raw, by = c("year"))
mod <- gam(cbind(n, alln) ~ s(year, k = 10), family = binomial(link = "logit"),
data = df)
plot(mod, residuals = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment