Skip to content

Instantly share code, notes, and snippets.

@padpadpadpad
Last active July 13, 2023 06:53
Show Gist options
  • Save padpadpadpad/80e2935e53e739e79d958c052a66ea55 to your computer and use it in GitHub Desktop.
Save padpadpadpad/80e2935e53e739e79d958c052a66ea55 to your computer and use it in GitHub Desktop.
Runs an example of doing weighted bootstrap regression on many curves using rTPC
# try weighted bootstrapping of many curves
# combines approach from weighted bootstrapping and bootstrapping many curves
# https://padpadpadpad.github.io/rTPC/articles/bootstrapping_many_curves.html
# https://padpadpadpad.github.io/rTPC/articles/weighted_bootstrapping.html
# load in packages
librarian::shelf(tidyverse, rTPC, nls.multstart, broom, minpack.lm, car)
# load in data
data("chlorella_tpc")
# create data by flux with standard deviations
d <- group_by(chlorella_tpc, flux, temp) %>%
summarise(sd = sd(rate),
ave_rate = mean(rate),
.groups = 'drop')
# fit the curve to each model using nls.multstart
d_fits <- nest(d, data = c(ave_rate, temp, sd)) %>%
mutate(gaussian = map(data, ~nls_multstart(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
data = .x,
iter = c(3,3,3),
start_lower = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') - 1,
start_upper = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') + 1,
lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
supp_errors = 'Y',
convergence_count = FALSE,
# include weights here!
modelweights = 1/sd)))
# check they work
d_fits$gaussian[[1]]
d_fits$gaussian[[2]]
# create high resolution predictions
d_preds <- mutate(d_fits, new_data = map(data, ~tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100)))) %>%
select(., -data) %>%
mutate(preds = map2(gaussian, new_data, ~augment(.x, newdata = .y))) %>%
select(flux, preds) %>%
unnest(preds)
# show the data
ggplot(d, aes(temp, ave_rate)) +
geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) +
geom_point(size = 2) +
geom_line(aes(temp, .fitted), d_preds) +
theme_bw(base_size = 12) +
labs(x = 'Temperature (ºC)',
y = 'Metabolic rate',
title = 'Metabolic rate across temperatures') +
facet_wrap(~flux)
# refit with nlsLM - THIS IS SUPER IMPORTANT
# get coefs for the nls.multstart to pass as start values
d_fits <- mutate(d_fits, coefs = map(gaussian, coef))
# fit with nlsLM instead
d_fits <- mutate(d_fits, nls_fit = map2(data, coefs, ~nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
data = .x,
start = .y,
lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
# add in weights here
weight = 1/sd)))
# try do bootstrapping usind the for loop approach
# create empty list column
d_fits <- mutate(d_fits, bootstrap = list(rep(NA, n())))
# run for loop to bootstrap each refitted model
for(i in 1:nrow(d_fits)){
temp_data <- d_fits$data[[i]]
temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
data = temp_data,
start = d_fits$coefs[[i]],
lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
weights = 1/temp_data$sd)
boot <- Boot(temp_fit, method = 'residual')
d_fits$bootstrap[[i]] <- boot
rm(list = c('temp_fit', 'temp_data', 'boot'))
}
d_fits
# get the raw values of each bootstrap
d_fits <- mutate(d_fits, output_boot = map(bootstrap, function(x) x$t))
# calculate predictions with a gnarly written function
d_fits <- mutate(d_fits, preds = map2(output_boot, data, function(x, y){
temp <- as.data.frame(x) %>%
drop_na() %>%
mutate(iter = 1:n()) %>%
group_by_all() %>%
do(data.frame(temp = seq(min(y$temp), max(y$temp), length.out = 100))) %>%
ungroup() %>%
mutate(pred = gaussian_1987(temp, rmax, topt, a))
return(temp)
}))
# select, unnest and calculate 95% CIs of predictions
boot_conf_preds <- select(d_fits, flux, preds) %>%
unnest(preds) %>%
group_by(flux, temp) %>%
summarise(conf_lower = quantile(pred, 0.025),
conf_upper = quantile(pred, 0.975),
.groups = 'drop')
ggplot() +
geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot_conf_preds, fill = 'blue', alpha = 0.3) +
geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) +
geom_point(aes(temp, ave_rate), d, size = 2) +
theme_bw(base_size = 12) +
labs(x = 'Temperature (ºC)',
y = 'Rate') +
facet_wrap(~flux)
# get confidence intervals of fitted parameters
d_fits <- mutate(d_fits, params = map(nls_fit, broom::tidy),
cis = map(bootstrap, function(x){
temp <- confint(x, method = 'bca') %>%
as.data.frame() %>%
rename(conf_lower = 1, conf_upper = 2) %>%
rownames_to_column(., var = 'term')
return(temp)
}))
# join parameter and confidence intervals in the same dataset
left_join(select(d_fits, flux, params) %>% unnest(params),
select(d_fits, flux, cis) %>% unnest(cis)) %>%
ggplot(., aes(flux, estimate)) +
geom_point(size = 4) +
geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
theme_bw() +
facet_wrap(~term, scales = 'free')
# run bootstrap for calc_params
# create empty list column
d_fits <- mutate(d_fits, ci_extra_params = list(rep(NA, n())))
# run for loop to bootstrap extra params from each model
for(i in 1:nrow(d_fits)){
temp_data <- d_fits$data[[i]]
temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
data = temp_data,
start = d_fits$coefs[[i]],
lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'))
boot <- Boot(temp_fit, f = function(x){unlist(calc_params(x))}, labels = names(calc_params(temp_fit)), R = 1000, method = 'case') %>%
confint(., method = 'bca') %>%
as.data.frame() %>%
rename(conf_lower = 1, conf_upper = 2) %>%
rownames_to_column(., var = 'param')
d_fits$ci_extra_params[[i]] <- boot
rm(list = c('temp_fit', 'temp_data', 'boot'))
}
# calculate extra params for each model and put in long format to begin with
d_fits <- mutate(d_fits, extra_params = map(nls_fit, function(x){calc_params(x) %>% pivot_longer(everything(), names_to = 'param', values_to = 'estimate')}))
left_join(select(d_fits, flux, extra_params) %>% unnest(extra_params),
select(d_fits, flux, ci_extra_params) %>% unnest(ci_extra_params)) %>%
ggplot(., aes(flux, estimate)) +
geom_point(size = 4) +
geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
theme_bw() +
labs(y = 'estimate', x = "curve id") +
facet_wrap(~param, scales = 'free') +
labs(title = 'Calculation of confidence intervals for extra parameters')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment