padpadpadpad · July 13, 2023 06:53
diff --git a/weighted_bootstrap_many_curves.R b/weighted_bootstrap_many_curves.R
 # try weighted bootstrapping of many curves
 # combines approach from weighted bootstrapping and bootstrapping many curves
 # https://padpadpadpad.github.io/rTPC/articles/bootstrapping_many_curves.html
 # https://padpadpadpad.github.io/rTPC/articles/weighted_bootstrapping.html

 # load in packages
 librarian::shelf(tidyverse, rTPC, nls.multstart, broom, minpack.lm, car)

 # load in data
 data("chlorella_tpc")

 # create data by flux with standard deviations
 d <- group_by(chlorella_tpc, flux, temp) %>%
  summarise(sd = sd(rate),
            ave_rate = mean(rate),
            .groups = 'drop')

 # fit the curve to each model using nls.multstart
 d_fits <- nest(d, data = c(ave_rate, temp, sd)) %>%
  mutate(gaussian = map(data, ~nls_multstart(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
                                             data = .x,
                                             iter = c(3,3,3),
                                             start_lower = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') - 1,
                                             start_upper = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') + 1,
                                             lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
                                             upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
                                             supp_errors = 'Y',
                                             convergence_count = FALSE,
                                             # include weights here!
                                             modelweights = 1/sd)))

 # check they work
 d_fits$gaussian[[1]]
 d_fits$gaussian[[2]]

 # create high resolution predictions
 d_preds <- mutate(d_fits, new_data = map(data, ~tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100)))) %>%
  select(., -data) %>%
  mutate(preds = map2(gaussian, new_data, ~augment(.x, newdata = .y))) %>%
  select(flux, preds) %>%
  unnest(preds)

 # show the data
 ggplot(d, aes(temp, ave_rate)) +
  geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) +
  geom_point(size = 2) +
  geom_line(aes(temp, .fitted), d_preds) +
  theme_bw(base_size = 12) +
  labs(x = 'Temperature (ºC)',
       y = 'Metabolic rate',
       title = 'Metabolic rate across temperatures') +
  facet_wrap(~flux)

 # refit with nlsLM - THIS IS SUPER IMPORTANT

 # get coefs for the nls.multstart to pass as start values
 d_fits <- mutate(d_fits, coefs = map(gaussian, coef))

 # fit with nlsLM instead
 d_fits <- mutate(d_fits, nls_fit = map2(data, coefs, ~nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
                                                            data = .x,
                                                            start = .y,
                                                            lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
                                                            upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
                                                            # add in weights here
                                                            weight = 1/sd)))

 # try do bootstrapping usind the for loop approach

 # create empty list column
 d_fits <- mutate(d_fits, bootstrap = list(rep(NA, n())))

 # run for loop to bootstrap each refitted model
 for(i in 1:nrow(d_fits)){
  temp_data <- d_fits$data[[i]]
  temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
                    data = temp_data,
                    start = d_fits$coefs[[i]],
                    lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
                    upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
                    weights = 1/temp_data$sd)
  boot <- Boot(temp_fit, method = 'residual')
  d_fits$bootstrap[[i]] <- boot
  rm(list = c('temp_fit', 'temp_data', 'boot'))
 }

 d_fits

 # get the raw values of each bootstrap
 d_fits <- mutate(d_fits, output_boot = map(bootstrap, function(x) x$t))

 # calculate predictions with a gnarly written function
 d_fits <- mutate(d_fits, preds = map2(output_boot, data, function(x, y){
  temp <- as.data.frame(x) %>%
    drop_na() %>%
    mutate(iter = 1:n()) %>%
    group_by_all() %>%
    do(data.frame(temp = seq(min(y$temp), max(y$temp), length.out = 100))) %>%
    ungroup() %>%
    mutate(pred = gaussian_1987(temp, rmax, topt, a))
  return(temp)
 }))

 # select, unnest and calculate 95% CIs of predictions
 boot_conf_preds <- select(d_fits, flux, preds) %>%
  unnest(preds) %>%
  group_by(flux, temp) %>%
  summarise(conf_lower = quantile(pred, 0.025),
            conf_upper = quantile(pred, 0.975),
            .groups = 'drop')

 ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot_conf_preds, fill = 'blue', alpha = 0.3) +
  geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) +
  geom_point(aes(temp, ave_rate), d, size = 2) +
  theme_bw(base_size = 12) +
  labs(x = 'Temperature (ºC)',
       y = 'Rate') +
  facet_wrap(~flux)
                                           
 # get confidence intervals of fitted parameters
 d_fits <- mutate(d_fits, params = map(nls_fit, broom::tidy),
                 cis = map(bootstrap, function(x){
                   temp <- confint(x, method = 'bca') %>%
                     as.data.frame() %>%
                     rename(conf_lower = 1, conf_upper = 2) %>%
                     rownames_to_column(., var = 'term')
                   return(temp)
                 }))

 # join parameter and confidence intervals in the same dataset 
 left_join(select(d_fits, flux, params) %>% unnest(params),
          select(d_fits, flux, cis) %>% unnest(cis)) %>%
  ggplot(., aes(flux, estimate)) +
  geom_point(size = 4) +
  geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
  theme_bw() +
  facet_wrap(~term, scales = 'free')

 # run bootstrap for calc_params

 # create empty list column
 d_fits <- mutate(d_fits, ci_extra_params = list(rep(NA, n())))

 # run for loop to bootstrap extra params from each model
 for(i in 1:nrow(d_fits)){
  temp_data <- d_fits$data[[i]]
  temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
                    data = temp_data,
                    start = d_fits$coefs[[i]],
                    lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
                    upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'))
  boot <- Boot(temp_fit, f = function(x){unlist(calc_params(x))}, labels = names(calc_params(temp_fit)), R = 1000, method = 'case') %>%
    confint(., method = 'bca') %>%
    as.data.frame() %>%
    rename(conf_lower = 1, conf_upper = 2) %>%
    rownames_to_column(., var = 'param')
  d_fits$ci_extra_params[[i]] <- boot
  rm(list = c('temp_fit', 'temp_data', 'boot'))
 }

 # calculate extra params for each model and put in long format to begin with
 d_fits <- mutate(d_fits, extra_params = map(nls_fit, function(x){calc_params(x) %>% pivot_longer(everything(), names_to =  'param', values_to = 'estimate')}))

 left_join(select(d_fits, flux, extra_params) %>% unnest(extra_params),
          select(d_fits, flux, ci_extra_params) %>% unnest(ci_extra_params)) %>%
  ggplot(., aes(flux, estimate)) +
  geom_point(size = 4) +
  geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
  theme_bw() +
  labs(y = 'estimate', x = "curve id") +
  facet_wrap(~param, scales = 'free') +
  labs(title = 'Calculation of confidence intervals for extra parameters')
	# try weighted bootstrapping of many curves
	# combines approach from weighted bootstrapping and bootstrapping many curves
	# https://padpadpadpad.github.io/rTPC/articles/bootstrapping_many_curves.html
	# https://padpadpadpad.github.io/rTPC/articles/weighted_bootstrapping.html

	# load in packages
	librarian::shelf(tidyverse, rTPC, nls.multstart, broom, minpack.lm, car)

	# load in data
	data("chlorella_tpc")

	# create data by flux with standard deviations
	d <- group_by(chlorella_tpc, flux, temp) %>%
	summarise(sd = sd(rate),
	ave_rate = mean(rate),
	.groups = 'drop')

	# fit the curve to each model using nls.multstart
	d_fits <- nest(d, data = c(ave_rate, temp, sd)) %>%
	mutate(gaussian = map(data, ~nls_multstart(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
	data = .x,
	iter = c(3,3,3),
	start_lower = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') - 1,
	start_upper = get_start_vals(.x$temp, .x$ave_rate, model_name = 'gaussian_1987') + 1,
	lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
	upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
	supp_errors = 'Y',
	convergence_count = FALSE,
	# include weights here!
	modelweights = 1/sd)))

	# check they work
	d_fits$gaussian[[1]]
	d_fits$gaussian[[2]]

	# create high resolution predictions
	d_preds <- mutate(d_fits, new_data = map(data, ~tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100)))) %>%
	select(., -data) %>%
	mutate(preds = map2(gaussian, new_data, ~augment(.x, newdata = .y))) %>%
	select(flux, preds) %>%
	unnest(preds)

	# show the data
	ggplot(d, aes(temp, ave_rate)) +
	geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) +
	geom_point(size = 2) +
	geom_line(aes(temp, .fitted), d_preds) +
	theme_bw(base_size = 12) +
	labs(x = 'Temperature (ºC)',
	y = 'Metabolic rate',
	title = 'Metabolic rate across temperatures') +
	facet_wrap(~flux)

	# refit with nlsLM - THIS IS SUPER IMPORTANT

	# get coefs for the nls.multstart to pass as start values
	d_fits <- mutate(d_fits, coefs = map(gaussian, coef))

	# fit with nlsLM instead
	d_fits <- mutate(d_fits, nls_fit = map2(data, coefs, ~nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
	data = .x,
	start = .y,
	lower = get_lower_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
	upper = get_upper_lims(.x$temp, .x$ave_rate, model_name = 'gaussian_1987'),
	# add in weights here
	weight = 1/sd)))

	# try do bootstrapping usind the for loop approach

	# create empty list column
	d_fits <- mutate(d_fits, bootstrap = list(rep(NA, n())))

	# run for loop to bootstrap each refitted model
	for(i in 1:nrow(d_fits)){
	temp_data <- d_fits$data[[i]]
	temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
	data = temp_data,
	start = d_fits$coefs[[i]],
	lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
	upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
	weights = 1/temp_data$sd)
	boot <- Boot(temp_fit, method = 'residual')
	d_fits$bootstrap[[i]] <- boot
	rm(list = c('temp_fit', 'temp_data', 'boot'))
	}

	d_fits

	# get the raw values of each bootstrap
	d_fits <- mutate(d_fits, output_boot = map(bootstrap, function(x) x$t))

	# calculate predictions with a gnarly written function
	d_fits <- mutate(d_fits, preds = map2(output_boot, data, function(x, y){
	temp <- as.data.frame(x) %>%
	drop_na() %>%
	mutate(iter = 1:n()) %>%
	group_by_all() %>%
	do(data.frame(temp = seq(min(y$temp), max(y$temp), length.out = 100))) %>%
	ungroup() %>%
	mutate(pred = gaussian_1987(temp, rmax, topt, a))
	return(temp)
	}))

	# select, unnest and calculate 95% CIs of predictions
	boot_conf_preds <- select(d_fits, flux, preds) %>%
	unnest(preds) %>%
	group_by(flux, temp) %>%
	summarise(conf_lower = quantile(pred, 0.025),
	conf_upper = quantile(pred, 0.975),
	.groups = 'drop')

	ggplot() +
	geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
	geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot_conf_preds, fill = 'blue', alpha = 0.3) +
	geom_linerange(aes(x = temp, ymin = ave_rate - sd, ymax = ave_rate + sd), d) +
	geom_point(aes(temp, ave_rate), d, size = 2) +
	theme_bw(base_size = 12) +
	labs(x = 'Temperature (ºC)',
	y = 'Rate') +
	facet_wrap(~flux)

	# get confidence intervals of fitted parameters
	d_fits <- mutate(d_fits, params = map(nls_fit, broom::tidy),
	cis = map(bootstrap, function(x){
	temp <- confint(x, method = 'bca') %>%
	as.data.frame() %>%
	rename(conf_lower = 1, conf_upper = 2) %>%
	rownames_to_column(., var = 'term')
	return(temp)
	}))

	# join parameter and confidence intervals in the same dataset
	left_join(select(d_fits, flux, params) %>% unnest(params),
	select(d_fits, flux, cis) %>% unnest(cis)) %>%
	ggplot(., aes(flux, estimate)) +
	geom_point(size = 4) +
	geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
	theme_bw() +
	facet_wrap(~term, scales = 'free')

	# run bootstrap for calc_params

	# create empty list column
	d_fits <- mutate(d_fits, ci_extra_params = list(rep(NA, n())))

	# run for loop to bootstrap extra params from each model
	for(i in 1:nrow(d_fits)){
	temp_data <- d_fits$data[[i]]
	temp_fit <- nlsLM(ave_rate ~ gaussian_1987(temp, rmax, topt, a),
	data = temp_data,
	start = d_fits$coefs[[i]],
	lower = get_lower_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'),
	upper = get_upper_lims(temp_data$temp, temp_data$ave_rate, model_name = 'gaussian_1987'))
	boot <- Boot(temp_fit, f = function(x){unlist(calc_params(x))}, labels = names(calc_params(temp_fit)), R = 1000, method = 'case') %>%
	confint(., method = 'bca') %>%
	as.data.frame() %>%
	rename(conf_lower = 1, conf_upper = 2) %>%
	rownames_to_column(., var = 'param')
	d_fits$ci_extra_params[[i]] <- boot
	rm(list = c('temp_fit', 'temp_data', 'boot'))
	}

	# calculate extra params for each model and put in long format to begin with
	d_fits <- mutate(d_fits, extra_params = map(nls_fit, function(x){calc_params(x) %>% pivot_longer(everything(), names_to = 'param', values_to = 'estimate')}))

	left_join(select(d_fits, flux, extra_params) %>% unnest(extra_params),
	select(d_fits, flux, ci_extra_params) %>% unnest(ci_extra_params)) %>%
	ggplot(., aes(flux, estimate)) +
	geom_point(size = 4) +
	geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
	theme_bw() +
	labs(y = 'estimate', x = "curve id") +
	facet_wrap(~param, scales = 'free') +
	labs(title = 'Calculation of confidence intervals for extra parameters')