padpadpadpad · March 4, 2017 12:08
diff --git a/ASKHelp7.R b/ASKHelp7.R
 # good coding practice ####
 # 1. #hashtag your code so you know what it does
 # 2. clear workspace and load packages at the top to keep track of what you have loaded
 # 3. make sure your working directory is in the right place
 # 4. space things out in a way that makes your code readable to you
 # 5. google things you do not understand. The answers are out there, go find them
 # 6. do not get scared/angry when you get errors. It does get easier.... eventually

 # clear workspace #### Good code practice to do first
 # need to install mise first!
 # removes all currently loaded packages, console, vars and figures from R
 mise::mise(vars = TRUE, pkgs = TRUE, figs = TRUE, console = TRUE)

 # set working directory - do not need to do here ####
 #setwd("~/where/your/stuff/is")

 # load packages ####
 library(ggplot2)
 library(tidyr)
 library(dplyr)

 # create simulated data of cat frequencies
 # Hz exhale and Hz inhale frequencies before and after food
 # Hz is a proxy for contentedness
 cat_name <- c('Casper', 'Fluffy', 'Jim', 'Jenny', 'Spock', 'Rocky', 'Blackie', 'Zico', 'Diego', 'Phil')
 d <- data.frame(cat = cat_name, Hz_IN_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_IN_after = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_after = rnorm(length(cat_name), mean = 35, sd = 10))

 # put into long format
 # using gather - functions from tidyr and dplyr allow us to index a dataframe without having to use $ sign or speech marks
 d <- gather(d, key = time, value = freq, c(Hz_IN_before, Hz_EX_before, Hz_IN_after, Hz_EX_after))

 # need add a column for inhalation/exhalation and for before or after
 # this will achieve the long format dataframe we want!
 # can stack all of these new columns into one mutate function
 d <- mutate(d, breathe = ifelse(grepl('IN', time), 'inhale', 'exhale'),
            time = ifelse(grepl('before', time), 'before', 'after'))

 # quick visual plot
 # look at whether there is a difference between frequencies, time and breathe
 ggplot(d) +
  geom_boxplot(aes(x = time, y = freq)) +
  facet_wrap(~ breathe)

 # create a means dataset grouped by breathe
 # will give us a mean for each unique combination of cat and time
 d_mean <- group_by(d, cat, time) %>%
  summarise(., mean = mean(freq),
            sd = sd(freq)) %>%
  data.frame()

 # two tailed t-test
 t.test(mean ~ time, d_mean, paired = TRUE)

 # other option would be to do a linear model
 # can look at differences between breathe and before and after
 fit <- lm(freq ~ time * breathe, d)
 summary(fit)

 # (Intercept) is average freq of exhale after food as they are ordered alphabetically when the data is as.character()

 # diagnostic plot
 plot(fit)

 # if you find a significant effect then make some predictions
 # first create a new dummy data set - columns need to be the same name as in the model!
 new_data <- data.frame(expand.grid(time = unique(d$time), breathe = unique(d$breathe), stringsAsFactors = FALSE))

 # create predictions
 new_data$pred <- predict(fit, newdata = new_data)

 # create a code for after or before for plotting - to ensure that before is plotted before after on the boxplot
 d <- mutate(d, time_code = factor(ifelse(time == 'before', 0, 1)))
 new_data <- mutate(new_data, time_code = factor(ifelse(time == 'before', 0, 1)))

 # fancier boxplot with both points and boxplot ####
 # good to see how all the data makes the distribution of points
 # ggplot help can be found in lots of places online
 # specifically here:
 # http://docs.ggplot2.org/current/
 # http://ggplot2.org
 ggplot(d, aes(time_code, freq, col = breathe)) +
  geom_boxplot(aes(fill = breathe, col = breathe), outlier.shape = NA, position = position_dodge(width = 0.9)) +
  stat_summary(aes(group = breathe), geom = 'crossbar', fatten = 0, color = 'white', fun.data = function(x){ return(c(y=median(x), ymin=median(x), ymax=median(x)))}, position = position_dodge(width = 0.9)) +
  geom_point(shape = 21, fill = 'white', position = position_jitterdodge(dodge.width = 0.9, jitter.width = 0.2)) +
  ylab('Purr Frequency (Hz)') +
  xlab("") +
  theme_bw(base_size = 12, base_family = 'Helvetica') +
  scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
  scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
  scale_x_discrete(labels=c("Before Food","After Food"))

 # plot points with predictions instead
 ggplot(d, aes(time_code, freq, col = breathe)) +
  geom_point(position = position_dodge(width = 0.2), alpha = 0.5) +
  geom_point(aes(time_code, pred, col = breathe), data = new_data, size = 3, position = position_dodge(width = 0.2)) +
  geom_line(aes(time_code, pred, col = breathe, group = breathe), data = new_data, position = position_dodge(width = 0.2)) +
  ylab('Purr Frequency (Hz)') +
  xlab("") +
  theme_bw(base_size = 12, base_family = 'Helvetica') +
  scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
  scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
  scale_x_discrete(labels=c("Before Food","After Food"))
	# good coding practice ####
	# 1. #hashtag your code so you know what it does
	# 2. clear workspace and load packages at the top to keep track of what you have loaded
	# 3. make sure your working directory is in the right place
	# 4. space things out in a way that makes your code readable to you
	# 5. google things you do not understand. The answers are out there, go find them
	# 6. do not get scared/angry when you get errors. It does get easier.... eventually

	# clear workspace #### Good code practice to do first
	# need to install mise first!
	# removes all currently loaded packages, console, vars and figures from R
	mise::mise(vars = TRUE, pkgs = TRUE, figs = TRUE, console = TRUE)

	# set working directory - do not need to do here ####
	#setwd("~/where/your/stuff/is")

	# load packages ####
	library(ggplot2)
	library(tidyr)
	library(dplyr)

	# create simulated data of cat frequencies
	# Hz exhale and Hz inhale frequencies before and after food
	# Hz is a proxy for contentedness
	cat_name <- c('Casper', 'Fluffy', 'Jim', 'Jenny', 'Spock', 'Rocky', 'Blackie', 'Zico', 'Diego', 'Phil')
	d <- data.frame(cat = cat_name, Hz_IN_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_before = rnorm(length(cat_name), mean = 35, sd = 10), Hz_IN_after = rnorm(length(cat_name), mean = 35, sd = 10), Hz_EX_after = rnorm(length(cat_name), mean = 35, sd = 10))

	# put into long format
	# using gather - functions from tidyr and dplyr allow us to index a dataframe without having to use $ sign or speech marks
	d <- gather(d, key = time, value = freq, c(Hz_IN_before, Hz_EX_before, Hz_IN_after, Hz_EX_after))

	# need add a column for inhalation/exhalation and for before or after
	# this will achieve the long format dataframe we want!
	# can stack all of these new columns into one mutate function
	d <- mutate(d, breathe = ifelse(grepl('IN', time), 'inhale', 'exhale'),
	time = ifelse(grepl('before', time), 'before', 'after'))

	# quick visual plot
	# look at whether there is a difference between frequencies, time and breathe
	ggplot(d) +
	geom_boxplot(aes(x = time, y = freq)) +
	facet_wrap(~ breathe)

	# create a means dataset grouped by breathe
	# will give us a mean for each unique combination of cat and time
	d_mean <- group_by(d, cat, time) %>%
	summarise(., mean = mean(freq),
	sd = sd(freq)) %>%
	data.frame()

	# two tailed t-test
	t.test(mean ~ time, d_mean, paired = TRUE)

	# other option would be to do a linear model
	# can look at differences between breathe and before and after
	fit <- lm(freq ~ time * breathe, d)
	summary(fit)

	# (Intercept) is average freq of exhale after food as they are ordered alphabetically when the data is as.character()

	# diagnostic plot
	plot(fit)

	# if you find a significant effect then make some predictions
	# first create a new dummy data set - columns need to be the same name as in the model!
	new_data <- data.frame(expand.grid(time = unique(d$time), breathe = unique(d$breathe), stringsAsFactors = FALSE))

	# create predictions
	new_data$pred <- predict(fit, newdata = new_data)

	# create a code for after or before for plotting - to ensure that before is plotted before after on the boxplot
	d <- mutate(d, time_code = factor(ifelse(time == 'before', 0, 1)))
	new_data <- mutate(new_data, time_code = factor(ifelse(time == 'before', 0, 1)))

	# fancier boxplot with both points and boxplot ####
	# good to see how all the data makes the distribution of points
	# ggplot help can be found in lots of places online
	# specifically here:
	# http://docs.ggplot2.org/current/
	# http://ggplot2.org
	ggplot(d, aes(time_code, freq, col = breathe)) +
	geom_boxplot(aes(fill = breathe, col = breathe), outlier.shape = NA, position = position_dodge(width = 0.9)) +
	stat_summary(aes(group = breathe), geom = 'crossbar', fatten = 0, color = 'white', fun.data = function(x){ return(c(y=median(x), ymin=median(x), ymax=median(x)))}, position = position_dodge(width = 0.9)) +
	geom_point(shape = 21, fill = 'white', position = position_jitterdodge(dodge.width = 0.9, jitter.width = 0.2)) +
	ylab('Purr Frequency (Hz)') +
	xlab("") +
	theme_bw(base_size = 12, base_family = 'Helvetica') +
	scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
	scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
	scale_x_discrete(labels=c("Before Food","After Food"))

	# plot points with predictions instead
	ggplot(d, aes(time_code, freq, col = breathe)) +
	geom_point(position = position_dodge(width = 0.2), alpha = 0.5) +
	geom_point(aes(time_code, pred, col = breathe), data = new_data, size = 3, position = position_dodge(width = 0.2)) +
	geom_line(aes(time_code, pred, col = breathe, group = breathe), data = new_data, position = position_dodge(width = 0.2)) +
	ylab('Purr Frequency (Hz)') +
	xlab("") +
	theme_bw(base_size = 12, base_family = 'Helvetica') +
	scale_color_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
	scale_fill_manual('', values = c('blue', 'red'), labels = c('Exhale', 'Inhale')) +
	scale_x_discrete(labels=c("Before Food","After Food"))