Last active
August 3, 2018 09:07
-
-
Save padpadpadpad/9cdce0318c38bd77c1311f9a37956511 to your computer and use it in GitHub Desktop.
code for cat
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # example for Cat | |
| # set seed | |
| set.seed(42) | |
| # load packages #### | |
| library(ggplot2) | |
| library(dplyr) | |
| library(tidyr) | |
| library(broom) | |
| library(purrr) | |
| library(segmented) | |
| library(patchwork) # if not there devtools::install_github('thomasp85/patchwork') | |
| # if first time running devtools::install_github('padpadpadpad/MicrobioUoE') | |
| # custom functions #### | |
| # function to clean up slope functions from segmented | |
| get_slopes <- function(seg_mod){ | |
| temp <- slope(seg_mod) | |
| temp1 <- temp$days %>% data.frame() | |
| temp1 <- tibble::rownames_to_column(temp1, var = 'slope_no') | |
| colnames(temp1) <- c('term', 'estimate', 'se', 't.value', 'conf.low', 'conf.high') | |
| return(temp1) | |
| } | |
| # function to extract slopes and breakpoint | |
| get_seg_lm_info <- function(seg_mod){ | |
| temp <- slope(seg_mod) | |
| temp1 <- temp$days %>% data.frame() | |
| temp1 <- tibble::rownames_to_column(temp1, var = 'slope_no') | |
| colnames(temp1) <- c('term', 'estimate', 'se', 't.value', 'conf.low', 'conf.high') | |
| bp <- tibble(term = 'break_point', estimate = seg_mod$psi[,2], se = seg_mod$psi[,3], t.value = NA, conf.low = NA, conf.high = NA) | |
| temp1 <- rbind(temp1, bp) | |
| return(temp1) | |
| } | |
| # load data - this needs changing #### | |
| test <- readRDS('~/Desktop/test100') | |
| # loop through lm and segmented fits for each ID #### | |
| control <- seg.control(it.max = 1000, K = 100) | |
| # get unique IDs | |
| id <- unique(test$ID) | |
| # create empty dataframe | |
| models <- tibble::tibble(ID = id, data = list(rep(NA, length(unique(test$ID)))), | |
| lm = list(rep(NA, length(unique(test$ID)))), | |
| seg_lm = list(rep(NA, length(unique(test$ID)))), | |
| count = rep(NA, length(unique(test$ID)))) | |
| # for loop | |
| for(i in 1:length(id)){ | |
| temp <- filter(test, ID == id[i]) | |
| # fit lm | |
| lm <- lm(truedis ~ days, temp) | |
| seg_fit <- NULL | |
| counter <- 0 | |
| # try and set up a while loop to try x number of times to fit the model | |
| while(is.null(seg_fit) == TRUE & counter < 100000){ | |
| # fit segmod | |
| # have put a try in here to return nothing if it does not fit | |
| try(seg_fit <- segmented(lm, seg.Z = ~ days, psi = list(days = 15), control = control), silent = TRUE) | |
| counter <- sum(counter, 1) | |
| } | |
| if(is.null(seg_fit) == TRUE){seg_fit <- NA} | |
| # assign values to place in list dataframe | |
| models$data[[i]] <- temp | |
| models$lm[[i]] <- lm | |
| models$seg_lm[[i]] <- seg_fit | |
| models$count[i] <- counter | |
| } | |
| # get rid of NA fits, could probably keep the linear model fits of them at some point | |
| models_sub <- filter(models, !is.na(seg_lm)) | |
| id_na <- filter(models, is.na(seg_lm)) %>% pull(ID) | |
| # get info on each fit such as AIC, BIC, df etc | |
| info2 <- models_sub %>% | |
| unnest(lm %>% map(glance)) | |
| # get parameters of each linear model (slope and intercept) | |
| params <- models_sub %>% | |
| unnest(lm %>% map(tidy)) | |
| # get confidence intervals for the linear models | |
| CI_params <- models_sub %>% | |
| unnest(lm %>% map(confint_tidy)) | |
| # bind with parameters | |
| params <- bind_cols(params, select(CI_params, -ID)) | |
| # get information such as AICc for each fit | |
| info <- models_sub %>% | |
| mutate(AICc_lm = map_dbl(lm, MuMIn::AICc), | |
| AICc_seg = map_dbl(seg_lm, MuMIn::AICc)) | |
| # get preds of each model, linear model and segmented lm #### | |
| # want to predict the segmented lm at only the min, max and breakpoint for each curve | |
| # get break point for each segmented lm | |
| breakpoint <- mutate(models_sub, breakpoint = map(seg_lm, get_seg_lm_info)) %>% | |
| unnest(breakpoint) %>% | |
| filter(., term == 'break_point') %>% | |
| select(., ID, estimate) %>% | |
| rename(days = estimate) | |
| # get max and min for each curve | |
| new_preds <- group_by(test, ID) %>% | |
| do(data.frame(days = c(min(.$days), max(.$days)))) %>% | |
| bind_rows(., breakpoint) %>% | |
| arrange(., ID, days) %>% | |
| filter(., ! ID %in% id_na) | |
| # predict | |
| preds_lm <- models_sub %>% | |
| unnest(lm %>% map(augment)) | |
| preds_seg <- models_sub %>% | |
| unnest(seg_lm %>% map(augment, newdata = new_preds)) | |
| # plot each model for each id | |
| p1 <- ggplot() + | |
| geom_point(aes(days, truedis), test) + | |
| geom_line(aes(days, .fitted), preds_lm, col = 'red') + | |
| geom_line(aes(days, .fitted), preds_seg, col = 'blue') + | |
| facet_wrap(~ID) + | |
| geom_text(aes(30, 30, label = paste('AICc = ', round(AICc_lm, 2), sep = '')), info, col = 'red', size = 2.2) + | |
| geom_text(aes(30, 37, label = paste('AICc = ', round(AICc_seg, 2), sep = '')), info, col = 'blue', size = 2.2) + | |
| ylim(-5, 40) + | |
| theme_bw() + | |
| ggtitle('Pine marten movement post release') | |
| # look at which model is best, use delta AICc score of 2 | |
| seg_mod_best <- mutate(info, diff_aic = AICc_lm - AICc_seg) %>% | |
| filter(., diff_aic >= 2) %>% | |
| pull(ID) | |
| # filter for best model | |
| lin_mod_best <- filter(models_sub, ! ID %in% seg_mod_best) | |
| seg_mod_best <- filter(models_sub, ID %in% seg_mod_best) | |
| # get slopes for lms and segmented fits | |
| seg_mod_params <- mutate(seg_mod_best, slopes = map(seg_lm, get_seg_lm_info)) %>% | |
| unnest(slopes) | |
| lin_mod_params <- lin_mod_best %>% | |
| unnest(lm %>% map(tidy)) %>% | |
| filter(., term == 'days') | |
| # compare slope estimates from ones with linear and ones with seg mod | |
| slopes <- bind_rows(select(lin_mod_params, ID, term, estimate), | |
| select(seg_mod_params, ID, term, estimate) %>% | |
| filter(term != 'break_point')) | |
| # plot #### | |
| # labels for x axis | |
| x_axis <- c('linear model\nbest slope', '1st slope\nfor segfit', '2nd slope\nfor seg fit') | |
| # make plot of slopes | |
| p2 <- ggplot(slopes, aes(term, estimate)) + | |
| MicrobioUoE::geom_pretty_boxplot(fill = 'black', col = 'black') + | |
| geom_point(size = 3, fill = 'white', shape = 21, position = position_jitter(width = 0.1)) + | |
| ylab('Distance travelled per day (km)') + | |
| xlab('') + | |
| ggtitle('Distance travelled per day\nby different pine martens') + | |
| theme_bw() + | |
| scale_x_discrete(labels = x_axis) | |
| # make plot of breakpoints | |
| p3 <- ggplot(filter(seg_mod_params, term == 'break_point'), aes(term, estimate)) + | |
| MicrobioUoE::geom_pretty_boxplot(fill = 'black', col = 'black') + | |
| geom_point(size = 3, fill = 'white', shape = 21, position = position_jitter(width = 0.1)) + | |
| xlab('') + | |
| ylab('Day when pine martens settle down') + | |
| ggtitle('Breakpoint when pine martens\nsettle down, if they do!') + | |
| theme_bw() | |
| # stack plot | |
| p4 <- p1 + {p2 + p3 + plot_layout(ncol = 1)} + plot_layout(ncol = 2, widths = c(0.75, 0.25)) | |
| # save this - this path needs changing | |
| ggsave( '~/Desktop/cat_pine_marten.pdf', p4, width = 10, height = 7) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment