padpadpadpad · July 9, 2019 10:28
diff --git a/roll_regression.R b/roll_regression.R
 # lets do some rolling regression

 # load in some packages
 library(tidyverse)
 library(nlsMicrobio)
 library(zoo)
 library(broom)

 # write a function to do the rolling regression
 # this will do an lm then clean up the data. it does not return the r.squared value but this would be easy to do
 rolling_coefs <- . %>% data.frame %>% lm() %>% broom::tidy()

 # write a different function that extracts elements from the lm object
 rolling_coefs2 <- function(x){
  temp <- data.frame(x)
  mod <- lm(temp)
  temp <- data.frame(slope = coef(mod)[[2]],
                     conf_lower = confint(mod)[2, ][[1]],
                     conf_upper = confint(mod)[2, ][[2]],
                     rsq = summary(mod)$r.squared, stringsAsFactors = FALSE)
  return(temp)
 }

 # load in some data
 x <- nlsMicrobio::growthcurve1 %>%
  mutate(curve = '1')
 y <- nlsMicrobio::growthcurve4 %>%
  mutate(curve = '2')
 z <- nlsMicrobio::growthcurve3 %>%
  mutate(curve = '3')

 d <- bind_rows(x, y, z)

 # so rolling regression needs ln od, not log10

 d <- mutate(d, ln_abund = log(10^LOG10N))

 # quick plot
 ggplot(d, aes(t, ln_abund, col = as.factor(curve))) +
  geom_point()

 # some pretty different curves here

 # run rolling regression on ln_abund

 # width controls the number of points the shifting window uses. 3 point regression (probably want a bit longer)

 models <- d %>%
  group_by(curve) %>%
  do(cbind(model = select(., ln_abund, t) %>% 
           zoo::rollapplyr(width = 3, rolling_coefs, by.column = FALSE, fill = NA),
           t = select(., t)))

 # alternative
 models2 <- d %>%
  group_by(curve) %>%
  do(cbind(model = select(., ln_abund, t) %>% 
             zoo::rollapplyr(width = 3, rolling_coefs2, by.column = FALSE, fill = NA),
           t = select(., t)))

 # can then pick one with highest value per curve
 # could also filter on p value r.squared etc
 d_growth_rate <- filter(models, model.term == 't') %>%
  mutate_at(., vars(starts_with('model')), as.numeric) %>%
  group_by(curve) %>%
  filter(., model.estimate == max(model.estimate, na.rm = TRUE)) %>%
  ungroup()

 d_growth_rate2 <- filter(models2, model.rsq > 0.95) %>%
  mutate_at(., vars(starts_with('model')), as.numeric) %>%
  group_by(curve) %>%
  filter(., model.slope == max(model.slope, na.rm = TRUE)) %>%
  ungroup()
	# lets do some rolling regression

	# load in some packages
	library(tidyverse)
	library(nlsMicrobio)
	library(zoo)
	library(broom)

	# write a function to do the rolling regression
	# this will do an lm then clean up the data. it does not return the r.squared value but this would be easy to do
	rolling_coefs <- . %>% data.frame %>% lm() %>% broom::tidy()

	# write a different function that extracts elements from the lm object
	rolling_coefs2 <- function(x){
	temp <- data.frame(x)
	mod <- lm(temp)
	temp <- data.frame(slope = coef(mod)[[2]],
	conf_lower = confint(mod)[2, ][[1]],
	conf_upper = confint(mod)[2, ][[2]],
	rsq = summary(mod)$r.squared, stringsAsFactors = FALSE)
	return(temp)
	}

	# load in some data
	x <- nlsMicrobio::growthcurve1 %>%
	mutate(curve = '1')
	y <- nlsMicrobio::growthcurve4 %>%
	mutate(curve = '2')
	z <- nlsMicrobio::growthcurve3 %>%
	mutate(curve = '3')

	d <- bind_rows(x, y, z)

	# so rolling regression needs ln od, not log10

	d <- mutate(d, ln_abund = log(10^LOG10N))

	# quick plot
	ggplot(d, aes(t, ln_abund, col = as.factor(curve))) +
	geom_point()

	# some pretty different curves here

	# run rolling regression on ln_abund

	# width controls the number of points the shifting window uses. 3 point regression (probably want a bit longer)

	models <- d %>%
	group_by(curve) %>%
	do(cbind(model = select(., ln_abund, t) %>%
	zoo::rollapplyr(width = 3, rolling_coefs, by.column = FALSE, fill = NA),
	t = select(., t)))

	# alternative
	models2 <- d %>%
	group_by(curve) %>%
	do(cbind(model = select(., ln_abund, t) %>%
	zoo::rollapplyr(width = 3, rolling_coefs2, by.column = FALSE, fill = NA),
	t = select(., t)))

	# can then pick one with highest value per curve
	# could also filter on p value r.squared etc
	d_growth_rate <- filter(models, model.term == 't') %>%
	mutate_at(., vars(starts_with('model')), as.numeric) %>%
	group_by(curve) %>%
	filter(., model.estimate == max(model.estimate, na.rm = TRUE)) %>%
	ungroup()

	d_growth_rate2 <- filter(models2, model.rsq > 0.95) %>%
	mutate_at(., vars(starts_with('model')), as.numeric) %>%
	group_by(curve) %>%
	filter(., model.slope == max(model.slope, na.rm = TRUE)) %>%
	ungroup()