Skip to content

Instantly share code, notes, and snippets.

@padpadpadpad
Created March 4, 2017 12:08
Show Gist options
  • Save padpadpadpad/c2dd7828c977c583d462c0806567f884 to your computer and use it in GitHub Desktop.
Save padpadpadpad/c2dd7828c977c583d462c0806567f884 to your computer and use it in GitHub Desktop.
Simple linear model and use of dplyr tidyr and ggplot2
# good coding practice ####
# 1. #hashtag your code so you know what it does
# 2. clear workspace and load packages at the top to keep track of what you have loaded
# 3. make sure your working directory is in the right place
# 4. space things out in a way that makes your code readable to you
# 5. google things you do not understand. The answers are out there, go find them
# 6. do not get scared/angry when you get errors. It does get easier.... eventually
# clear workspace #### Good code practice to do first
# need to install mise first!
# removes all currently loaded packages, console, vars and figures from R
mise::mise(vars = TRUE, pkgs = TRUE, figs = TRUE, console = TRUE)
# set working directory - do not need to do here ####
#setwd("~/where/your/stuff/is")
# load packages ####
library(ggplot2)
library(tidyr)
library(dplyr)
# create simulated data of cat frequencies
# Hz exhale and Hz inhale frequencies before and after food
# Hz is a proxy for contentedness
cat_name <- c('Casper', 'Fluffy', 'Jim', 'Jenny', 'Spock', 'Rocky', 'Blackie', 'Zico', 'Diego', 'Phil')
d <- data.frame(cat = cat_name, Hz_IN_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_IN_after = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_after = rnorm(length(cat_name), mean = 35, sd = 10))
# put into long format
# using gather - functions from tidyr and dplyr allow us to index a dataframe without having to use $ sign or speech marks
d <- gather(d, key = time, value = freq, c(Hz_IN_before, Hz_EX_before, Hz_IN_after, Hz_EX_after))
# need add a column for inhalation/exhalation and for before or after
# this will achieve the long format dataframe we want!
# can stack all of these new columns into one mutate function
d <- mutate(d, breathe = ifelse(grepl('IN', time), 'inhale', 'exhale'),
time = ifelse(grepl('before', time), 'before', 'after'))
# quick visual plot
# look at whether there is a difference between frequencies, time and breathe
ggplot(d) +
geom_boxplot(aes(x = time, y = freq)) +
facet_wrap(~ breathe)
# create a means dataset grouped by breathe
# will give us a mean for each unique combination of cat and time
d_mean <- group_by(d, cat, time) %>%
summarise(., mean = mean(freq),
sd = sd(freq)) %>%
data.frame()
# two tailed t-test
t.test(mean ~ time, d_mean, paired = TRUE)
# other option would be to do a linear model
# can look at differences between breathe and before and after
fit <- lm(freq ~ time * breathe, d)
summary(fit)
# (Intercept) is average freq of exhale after food as they are ordered alphabetically when the data is as.character()
# diagnostic plot
plot(fit)
# if you find a significant effect then make some predictions
# first create a new dummy data set - columns need to be the same name as in the model!
new_data <- data.frame(expand.grid(time = unique(d$time), breathe = unique(d$breathe), stringsAsFactors = FALSE))
# create predictions
new_data$pred <- predict(fit, newdata = new_data)
# create a code for after or before for plotting - to ensure that before is plotted before after on the boxplot
d <- mutate(d, time_code = factor(ifelse(time == 'before', 0, 1)))
new_data <- mutate(new_data, time_code = factor(ifelse(time == 'before', 0, 1)))
# fancier boxplot with both points and boxplot ####
# good to see how all the data makes the distribution of points
# ggplot help can be found in lots of places online
# specifically here:
# http://docs.ggplot2.org/current/
# http://ggplot2.org
ggplot(d, aes(time_code, freq, col = breathe)) +
geom_boxplot(aes(fill = breathe, col = breathe), outlier.shape = NA, position = position_dodge(width = 0.9)) +
stat_summary(aes(group = breathe), geom = 'crossbar', fatten = 0, color = 'white', fun.data = function(x){ return(c(y=median(x), ymin=median(x), ymax=median(x)))}, position = position_dodge(width = 0.9)) +
geom_point(shape = 21, fill = 'white', position = position_jitterdodge(dodge.width = 0.9, jitter.width = 0.2)) +
ylab('Purr Frequency (Hz)') +
xlab("") +
theme_bw(base_size = 12, base_family = 'Helvetica') +
scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
scale_x_discrete(labels=c("Before Food","After Food"))
# plot points with predictions instead
ggplot(d, aes(time_code, freq, col = breathe)) +
geom_point(position = position_dodge(width = 0.2), alpha = 0.5) +
geom_point(aes(time_code, pred, col = breathe), data = new_data, size = 3, position = position_dodge(width = 0.2)) +
geom_line(aes(time_code, pred, col = breathe, group = breathe), data = new_data, position = position_dodge(width = 0.2)) +
ylab('Purr Frequency (Hz)') +
xlab("") +
theme_bw(base_size = 12, base_family = 'Helvetica') +
scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
scale_x_discrete(labels=c("Before Food","After Food"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment