padpadpadpad · February 2, 2017 17:39 · MolKems · Feb 1, 2017 · padpadpadpad · Feb 2, 2017
diff --git a/AskHelp1.R b/AskHelp1.R
 # good coding practice ####
 # 1. #hashtag your code so you know what it does
 # 2. clear workspace and load packages at the top to keep track of what you have loaded
 # 3. make sure your working directory is in the right place
 # 4. space things out in a way that makes your code readable to you
 # 5. google things you do not understand. The answers are out there, go find them
 # 6. do not get scared/angry when you get errors. It does get easier.... eventually

 # clear workspace #### Good code practice to do first
 rm(list = ls())

 # set working directory - do not need to do here ####
 #setwd("~/where/your/stuff/is")

 # load packages ####
 # if you do not have these packages - install.packages('package name')
 library(dplyr)
 library(tidyr)
 library(ggplot2)
 library(magrittr)
 library(lubridate)

 # load data ####
 # df <- read.csv("data.csv", stringsAsFactors = FALSE)

 # create dummy data ####
 # a %>% means take what I have on the left and put it into the next expression
 # '.' means whatever the object on the left is
 df <- data.frame(time = seq(as.POSIXct("2016-06-24 23:00:00"), as.POSIXct("2016-10-25 08:30:00"), by = "15 mins")) %>%
  mutate(birds = rnorm(n(), 30, 5),
         temp = rnorm(n(), 25, 4),
         random = NA)

 # look at column names
 colnames(df)

 # look at first 6 values of dataframe
 head(df)

 # look at format of data
 str(df)

 # deselect the column random ####
 df <- select(df, - random)

 # rename a column ####
 df <- rename(df, date = time) 

 # check difference
 colnames(df)

 # make a new column for hour then make a column for night or day - you can apply this to your timeframes accordingly
 # mutate allows us to make new columns without multiple assignments and without excessive use of the $ sign.
 df <- mutate(df, hour = hour(date),
             day = day(date),
             time_of_day = ifelse(hour >= 18 | hour <= 7, 'night', 'day'))

 # create a mean number of birds every 30 minutes
 # group_by allows us to group variables in the dataframe to then do the same action on all of those groups
 df2 <- df %>% 
  group_by(day, time_of_day, time = cut(date, breaks = '30 min')) %>%
  summarise(birds = mean(birds, na.rm = TRUE),
            temp = mean(temp)) %>%
  data.frame()

 # our time format of time is lost!
 # we can easily make this again though using mutate!
 df2 <- mutate(df2,  time = as.POSIXct(strptime(time, format = '%Y-%m-%d %H:%M:%S')),
         hour = hour(time))
  
 # a quick plot
 # 1. does number of birds change with temperature
 ggplot(df2) +
  geom_point(aes(x = temp, y = birds, col = time_of_day)) +
  facet_wrap(~ time_of_day)

 # no because this is my made up data!!!!

 # 2. look at change through time
 ggplot(df2) +
  geom_point(aes(x = time, y = birds, col = time_of_day))

 # so many points we could change our grouping so that it does one value per day!

 # Quick redo ####
 df2 <- df %>% 
  group_by(day, time_of_day, time = cut(date, breaks = '1 day')) %>%
  summarise(birds = mean(birds, na.rm = TRUE),
            temp = mean(temp)) %>%
  data.frame()

 # our time format is lost!
 # we can easily make this again though using mutate!
 df2 <- mutate(df2,  time = as.POSIXct(strptime(time, format = '%Y-%m-%d')))

 ggplot(df2) +
  geom_point(aes(x = time, y = birds, col = time_of_day))

 # looks a bit better!!!
	# good coding practice ####
	# 1. #hashtag your code so you know what it does
	# 2. clear workspace and load packages at the top to keep track of what you have loaded
	# 3. make sure your working directory is in the right place
	# 4. space things out in a way that makes your code readable to you
	# 5. google things you do not understand. The answers are out there, go find them
	# 6. do not get scared/angry when you get errors. It does get easier.... eventually

	# clear workspace #### Good code practice to do first
	rm(list = ls())

	# set working directory - do not need to do here ####
	#setwd("~/where/your/stuff/is")

	# load packages ####
	# if you do not have these packages - install.packages('package name')
	library(dplyr)
	library(tidyr)
	library(ggplot2)
	library(magrittr)
	library(lubridate)

	# load data ####
	# df <- read.csv("data.csv", stringsAsFactors = FALSE)

	# create dummy data ####
	# a %>% means take what I have on the left and put it into the next expression
	# '.' means whatever the object on the left is
	df <- data.frame(time = seq(as.POSIXct("2016-06-24 23:00:00"), as.POSIXct("2016-10-25 08:30:00"), by = "15 mins")) %>%
	mutate(birds = rnorm(n(), 30, 5),
	temp = rnorm(n(), 25, 4),
	random = NA)

	# look at column names
	colnames(df)

	# look at first 6 values of dataframe
	head(df)

	# look at format of data
	str(df)

	# deselect the column random ####
	df <- select(df, - random)

	# rename a column ####
	df <- rename(df, date = time)

	# check difference
	colnames(df)

	# make a new column for hour then make a column for night or day - you can apply this to your timeframes accordingly
	# mutate allows us to make new columns without multiple assignments and without excessive use of the $ sign.
	df <- mutate(df, hour = hour(date),
	day = day(date),
	time_of_day = ifelse(hour >= 18 \| hour <= 7, 'night', 'day'))

	# create a mean number of birds every 30 minutes
	# group_by allows us to group variables in the dataframe to then do the same action on all of those groups
	df2 <- df %>%
	group_by(day, time_of_day, time = cut(date, breaks = '30 min')) %>%
	summarise(birds = mean(birds, na.rm = TRUE),
	temp = mean(temp)) %>%
	data.frame()

	# our time format of time is lost!
	# we can easily make this again though using mutate!
	df2 <- mutate(df2, time = as.POSIXct(strptime(time, format = '%Y-%m-%d %H:%M:%S')),
	hour = hour(time))

	# a quick plot
	# 1. does number of birds change with temperature
	ggplot(df2) +
	geom_point(aes(x = temp, y = birds, col = time_of_day)) +
	facet_wrap(~ time_of_day)

	# no because this is my made up data!!!!

	# 2. look at change through time
	ggplot(df2) +
	geom_point(aes(x = time, y = birds, col = time_of_day))

	# so many points we could change our grouping so that it does one value per day!

	# Quick redo ####
	df2 <- df %>%
	group_by(day, time_of_day, time = cut(date, breaks = '1 day')) %>%
	summarise(birds = mean(birds, na.rm = TRUE),
	temp = mean(temp)) %>%
	data.frame()

	# our time format is lost!
	# we can easily make this again though using mutate!
	df2 <- mutate(df2, time = as.POSIXct(strptime(time, format = '%Y-%m-%d')))

	ggplot(df2) +
	geom_point(aes(x = time, y = birds, col = time_of_day))

	# looks a bit better!!!