Last active
July 13, 2023 06:53
-
-
Save padpadpadpad/80e2935e53e739e79d958c052a66ea55 to your computer and use it in GitHub Desktop.
Runs an example of doing weighted bootstrap regression on many curves using rTPC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# try weighted bootstrapping of many curves | |
# combines approach from weighted bootstrapping and bootstrapping many curves | |
# https://padpadpadpad.github.io/rTPC/articles/bootstrapping_many_curves.html | |
# https://padpadpadpad.github.io/rTPC/articles/weighted_bootstrapping.html | |
# load in packages | |
librarian::shelf(tidyverse, rTPC, nls.multstart, broom, minpack.lm, car) | |
# load in data | |
data("chlorella_tpc") | |
# create data by flux with standard deviations | |
d <- group_by(chlorella_tpc, flux, temp) %>% | |
summarise(sd = sd(rate), | |
ave_rate = mean(rate), | |
.groups = 'drop') | |
# fit the curve to each model using nls.multstart | |
d_fits <- nest(d, data = c(ave_rate, temp, sd)) %>% | |
mutate(gaussian = map(data, ~nls_multstart(ave_rate ~ gaussian_1987(temp, rmax, topt, a), | |
data = .x, | |
iter = c(3,3,3), | |
start_lower = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') - 1, | |
start_upper = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') + 1, | |
lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'), | |
upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'), | |
supp_errors = 'Y', | |
convergence_count = FALSE, | |
# include weights here! | |
modelweights = 1/sd))) | |
# check they work | |
d_fits$gaussian[[1]] | |
d_fits$gaussian[[2]] | |
# create high resolution predictions | |
d_preds <- mutate(d_fits, new_data = map(data, ~tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100)))) %>% | |
select(., -data) %>% | |
mutate(preds = map2(gaussian, new_data, ~augment(.x, newdata = .y))) %>% | |
select(flux, preds) %>% | |
unnest(preds) | |
# show the data | |
ggplot(d, aes(temp, ave_rate)) + | |
geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) + | |
geom_point(size = 2) + | |
geom_line(aes(temp, .fitted), d_preds) + | |
theme_bw(base_size = 12) + | |
labs(x = 'Temperature (ºC)', | |
y = 'Metabolic rate', | |
title = 'Metabolic rate across temperatures') + | |
facet_wrap(~flux) | |
# refit with nlsLM - THIS IS SUPER IMPORTANT | |
# get coefs for the nls.multstart to pass as start values | |
d_fits <- mutate(d_fits, coefs = map(gaussian, coef)) | |
# fit with nlsLM instead | |
d_fits <- mutate(d_fits, nls_fit = map2(data, coefs, ~nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a), | |
data = .x, | |
start = .y, | |
lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'), | |
upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'), | |
# add in weights here | |
weight = 1/sd))) | |
# try do bootstrapping usind the for loop approach | |
# create empty list column | |
d_fits <- mutate(d_fits, bootstrap = list(rep(NA, n()))) | |
# run for loop to bootstrap each refitted model | |
for(i in 1:nrow(d_fits)){ | |
temp_data <- d_fits$data[[i]] | |
temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a), | |
data = temp_data, | |
start = d_fits$coefs[[i]], | |
lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'), | |
upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'), | |
weights = 1/temp_data$sd) | |
boot <- Boot(temp_fit, method = 'residual') | |
d_fits$bootstrap[[i]] <- boot | |
rm(list = c('temp_fit', 'temp_data', 'boot')) | |
} | |
d_fits | |
# get the raw values of each bootstrap | |
d_fits <- mutate(d_fits, output_boot = map(bootstrap, function(x) x$t)) | |
# calculate predictions with a gnarly written function | |
d_fits <- mutate(d_fits, preds = map2(output_boot, data, function(x, y){ | |
temp <- as.data.frame(x) %>% | |
drop_na() %>% | |
mutate(iter = 1:n()) %>% | |
group_by_all() %>% | |
do(data.frame(temp = seq(min(y$temp), max(y$temp), length.out = 100))) %>% | |
ungroup() %>% | |
mutate(pred = gaussian_1987(temp, rmax, topt, a)) | |
return(temp) | |
})) | |
# select, unnest and calculate 95% CIs of predictions | |
boot_conf_preds <- select(d_fits, flux, preds) %>% | |
unnest(preds) %>% | |
group_by(flux, temp) %>% | |
summarise(conf_lower = quantile(pred, 0.025), | |
conf_upper = quantile(pred, 0.975), | |
.groups = 'drop') | |
ggplot() + | |
geom_line(aes(temp, .fitted), d_preds, col = 'blue') + | |
geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot_conf_preds, fill = 'blue', alpha = 0.3) + | |
geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) + | |
geom_point(aes(temp, ave_rate), d, size = 2) + | |
theme_bw(base_size = 12) + | |
labs(x = 'Temperature (ºC)', | |
y = 'Rate') + | |
facet_wrap(~flux) | |
# get confidence intervals of fitted parameters | |
d_fits <- mutate(d_fits, params = map(nls_fit, broom::tidy), | |
cis = map(bootstrap, function(x){ | |
temp <- confint(x, method = 'bca') %>% | |
as.data.frame() %>% | |
rename(conf_lower = 1, conf_upper = 2) %>% | |
rownames_to_column(., var = 'term') | |
return(temp) | |
})) | |
# join parameter and confidence intervals in the same dataset | |
left_join(select(d_fits, flux, params) %>% unnest(params), | |
select(d_fits, flux, cis) %>% unnest(cis)) %>% | |
ggplot(., aes(flux, estimate)) + | |
geom_point(size = 4) + | |
geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + | |
theme_bw() + | |
facet_wrap(~term, scales = 'free') | |
# run bootstrap for calc_params | |
# create empty list column | |
d_fits <- mutate(d_fits, ci_extra_params = list(rep(NA, n()))) | |
# run for loop to bootstrap extra params from each model | |
for(i in 1:nrow(d_fits)){ | |
temp_data <- d_fits$data[[i]] | |
temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a), | |
data = temp_data, | |
start = d_fits$coefs[[i]], | |
lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'), | |
upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987')) | |
boot <- Boot(temp_fit, f = function(x){unlist(calc_params(x))}, labels = names(calc_params(temp_fit)), R = 1000, method = 'case') %>% | |
confint(., method = 'bca') %>% | |
as.data.frame() %>% | |
rename(conf_lower = 1, conf_upper = 2) %>% | |
rownames_to_column(., var = 'param') | |
d_fits$ci_extra_params[[i]] <- boot | |
rm(list = c('temp_fit', 'temp_data', 'boot')) | |
} | |
# calculate extra params for each model and put in long format to begin with | |
d_fits <- mutate(d_fits, extra_params = map(nls_fit, function(x){calc_params(x) %>% pivot_longer(everything(), names_to = 'param', values_to = 'estimate')})) | |
left_join(select(d_fits, flux, extra_params) %>% unnest(extra_params), | |
select(d_fits, flux, ci_extra_params) %>% unnest(ci_extra_params)) %>% | |
ggplot(., aes(flux, estimate)) + | |
geom_point(size = 4) + | |
geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + | |
theme_bw() + | |
labs(y = 'estimate', x = "curve id") + | |
facet_wrap(~param, scales = 'free') + | |
labs(title = 'Calculation of confidence intervals for extra parameters') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment