Created
March 4, 2017 12:08
-
-
Save padpadpadpad/c2dd7828c977c583d462c0806567f884 to your computer and use it in GitHub Desktop.
Simple linear model and use of dplyr tidyr and ggplot2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# good coding practice #### | |
# 1. #hashtag your code so you know what it does | |
# 2. clear workspace and load packages at the top to keep track of what you have loaded | |
# 3. make sure your working directory is in the right place | |
# 4. space things out in a way that makes your code readable to you | |
# 5. google things you do not understand. The answers are out there, go find them | |
# 6. do not get scared/angry when you get errors. It does get easier.... eventually | |
# clear workspace #### Good code practice to do first | |
# need to install mise first! | |
# removes all currently loaded packages, console, vars and figures from R | |
mise::mise(vars = TRUE, pkgs = TRUE, figs = TRUE, console = TRUE) | |
# set working directory - do not need to do here #### | |
#setwd("~/where/your/stuff/is") | |
# load packages #### | |
library(ggplot2) | |
library(tidyr) | |
library(dplyr) | |
# create simulated data of cat frequencies | |
# Hz exhale and Hz inhale frequencies before and after food | |
# Hz is a proxy for contentedness | |
cat_name <- c('Casper', 'Fluffy', 'Jim', 'Jenny', 'Spock', 'Rocky', 'Blackie', 'Zico', 'Diego', 'Phil') | |
d <- data.frame(cat = cat_name, Hz_IN_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_IN_after = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_after = rnorm(length(cat_name), mean = 35, sd = 10)) | |
# put into long format | |
# using gather - functions from tidyr and dplyr allow us to index a dataframe without having to use $ sign or speech marks | |
d <- gather(d, key = time, value = freq, c(Hz_IN_before, Hz_EX_before, Hz_IN_after, Hz_EX_after)) | |
# need add a column for inhalation/exhalation and for before or after | |
# this will achieve the long format dataframe we want! | |
# can stack all of these new columns into one mutate function | |
d <- mutate(d, breathe = ifelse(grepl('IN', time), 'inhale', 'exhale'), | |
time = ifelse(grepl('before', time), 'before', 'after')) | |
# quick visual plot | |
# look at whether there is a difference between frequencies, time and breathe | |
ggplot(d) + | |
geom_boxplot(aes(x = time, y = freq)) + | |
facet_wrap(~ breathe) | |
# create a means dataset grouped by breathe | |
# will give us a mean for each unique combination of cat and time | |
d_mean <- group_by(d, cat, time) %>% | |
summarise(., mean = mean(freq), | |
sd = sd(freq)) %>% | |
data.frame() | |
# two tailed t-test | |
t.test(mean ~ time, d_mean, paired = TRUE) | |
# other option would be to do a linear model | |
# can look at differences between breathe and before and after | |
fit <- lm(freq ~ time * breathe, d) | |
summary(fit) | |
# (Intercept) is average freq of exhale after food as they are ordered alphabetically when the data is as.character() | |
# diagnostic plot | |
plot(fit) | |
# if you find a significant effect then make some predictions | |
# first create a new dummy data set - columns need to be the same name as in the model! | |
new_data <- data.frame(expand.grid(time = unique(d$time), breathe = unique(d$breathe), stringsAsFactors = FALSE)) | |
# create predictions | |
new_data$pred <- predict(fit, newdata = new_data) | |
# create a code for after or before for plotting - to ensure that before is plotted before after on the boxplot | |
d <- mutate(d, time_code = factor(ifelse(time == 'before', 0, 1))) | |
new_data <- mutate(new_data, time_code = factor(ifelse(time == 'before', 0, 1))) | |
# fancier boxplot with both points and boxplot #### | |
# good to see how all the data makes the distribution of points | |
# ggplot help can be found in lots of places online | |
# specifically here: | |
# http://docs.ggplot2.org/current/ | |
# http://ggplot2.org | |
ggplot(d, aes(time_code, freq, col = breathe)) + | |
geom_boxplot(aes(fill = breathe, col = breathe), outlier.shape = NA, position = position_dodge(width = 0.9)) + | |
stat_summary(aes(group = breathe), geom = 'crossbar', fatten = 0, color = 'white', fun.data = function(x){ return(c(y=median(x), ymin=median(x), ymax=median(x)))}, position = position_dodge(width = 0.9)) + | |
geom_point(shape = 21, fill = 'white', position = position_jitterdodge(dodge.width = 0.9, jitter.width = 0.2)) + | |
ylab('Purr Frequency (Hz)') + | |
xlab("") + | |
theme_bw(base_size = 12, base_family = 'Helvetica') + | |
scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) + | |
scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) + | |
scale_x_discrete(labels=c("Before Food","After Food")) | |
# plot points with predictions instead | |
ggplot(d, aes(time_code, freq, col = breathe)) + | |
geom_point(position = position_dodge(width = 0.2), alpha = 0.5) + | |
geom_point(aes(time_code, pred, col = breathe), data = new_data, size = 3, position = position_dodge(width = 0.2)) + | |
geom_line(aes(time_code, pred, col = breathe, group = breathe), data = new_data, position = position_dodge(width = 0.2)) + | |
ylab('Purr Frequency (Hz)') + | |
xlab("") + | |
theme_bw(base_size = 12, base_family = 'Helvetica') + | |
scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) + | |
scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) + | |
scale_x_discrete(labels=c("Before Food","After Food")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment