n8thangreen · December 15, 2016 09:48
diff --git a/R introduction - Lecture 1.R b/R introduction - Lecture 1.R
 ## ----setup, include=FALSE------------------------------------------------
 knitr::opts_chunk$set(echo = TRUE)

 ## ---- echo=T, eval=T-----------------------------------------------------
 2 + 2

 ## ---- echo=T, eval=T-----------------------------------------------------
 a <- 2 + 2

 ## ---- echo=T,eval=T------------------------------------------------------
 b <- 2.5
 hobbit_home <- "hobbiton"
 c <- 3 * 5

 example_sum <- b + c

 hobbit_home
 example_sum

 ## ---- echo=T,eval=T------------------------------------------------------
 var <- 3 * 5
 var

 var <- 20
 var

 var <- "wheredidmynumbersgo"
 var

 ## ---- echo=T, eval=T-----------------------------------------------------
 a <- 16
 result1 <- sqrt(a)
 result2 <- sqrt(25)

 result1
 result2

 ## ---- echo=T,eval=T------------------------------------------------------
 # Since we did not specify an object name to save the result to, R will print out the result directly. 
 rnorm(n=10,mean=5,sd=2)

 ## ---- echo=T,eval=T------------------------------------------------------
 # This is a comment
 # This code declares two numbers and adds them together
 a <- 4
 b <- 5 # Comments can start partway through a line
 a + b

 # This bit of code will not run, as it is commented
 # c <- 6*6

 ## ----echo=T,eval=T-------------------------------------------------------
 a <- "letshavealookthen"
 print(a)

 ## ----echo=T,eval=T-------------------------------------------------------
 rnorm(10,
      2,
      1
      )

 ## ---- echo=T,eval=T------------------------------------------------------
 x <- TRUE
 y <- FALSE

 ## ---- echo=T,eval=T------------------------------------------------------
 # A vector of integers
 systolic_bp <- c(140,120,100,90,110)
 print(systolic_bp)

 # A vector of numerics
 blood_conc <- c(0.1,0.001,0.0005,0.03,0.6)
 print(blood_conc)

 # A way to create vectors of sequential integers
 person_no <- 1:5
 print(person_no)

 # A vector of characters
 person_name <- c("frodo","samwise","meriadoc", "peregrin", "aragorn")
 print(person_name)

 ## ----echo=T,eval=T-------------------------------------------------------
 # Look at two vectors side by side using cbind
 cbind(systolic_bp, blood_conc)

 # Look at two vectors as rows using rbind
 rbind(systolic_bp, blood_conc)


 ## ---- echo=T,eval=T------------------------------------------------------
 # Print the first and third element of the vector
 print(systolic_bp[1])
 print(systolic_bp[3])

 # Change the fourth element of the chars vector
 person_name[4] <- "fool of a took"

 ## ---- echo=T,eval=T------------------------------------------------------
 # Print elements 1 to 3
 print(blood_conc[1:3])
 # Print elements 1, 3 and 6
 print(blood_conc[c(1,3,5)])

 ## ---- echo=T,eval=T------------------------------------------------------
 # Print all but the second element of x
 print(blood_conc[-2])

 # Print all but the first and last element of x
 print(blood_conc[-c(1,5)])

 ## ---- echo=T,eval=T------------------------------------------------------
 age <- c(50,38,36,28,87)
 fellowship <- data.frame(person_no,person_name,age, systolic_bp, blood_conc)
 print(fellowship)

 ## ---- echo=T,eval=T------------------------------------------------------
 wrong <- c(1,1.325,21,"sneaky")
 print(wrong)

 ## ---- echo=T, eval=T-----------------------------------------------------
 # Generate fake data
 age <- c(25,44,17,30,61)
 height <- c(170,180,150,145,188)
 person_name <- c("brock","surge","misty","erika","koga")

 # Declare a vector of characters to represent gender and blood type, 
 # and convert them to factors
 gender <- factor(c("M","M","F","F","M"), levels=c("M","F","Other"))
 blood_type <- factor(c("A","B","A","AB","A"), levels=c("A","B","AB","O"))

 # Note that we have to include the argument `stringsAsFactors = FALSE` 
 # to ensure that R doesn't convert the person_name vector into 
 # factors automatically.
 people <- data.frame(person_no,person_name,age,height,gender,blood_type, stringsAsFactors = FALSE)
 print(people)
 print(levels(blood_type))

 ## ---- echo=T,eval=T------------------------------------------------------
 # Get the age of the first person. Note the comma between the row index and column name
 print(people[1,"age"]) 

 # Use vector slicing to get subsets of data
 print(people[1:3,"blood_type"])
 print(people[4,c("gender","blood_type")])

 # You can omit either the row id or column name, and R will give you the entire column/row
 # Print all data for the third person
 print(people[3,])

 # Print all heights
 print(people[,"height"])

 # You can change column names after creation using "colnames"
 colnames(people) <- c("Patient_ID","Name","Age","Height","Gender","Blood_Type")
 print(people)

 ## ---- echo=T,eval=T------------------------------------------------------
 # Print the Patient_ID variable from the people data frame
 print(people$Patient_ID)

 # Print the 3rd element of the Gender variable
 print(people$Gender[3])

 ## ---- echo=T,eval=T------------------------------------------------------
 # Note that R goes through and multiplies each element
 age_squared <- age*age
 print(age_squared)

 # You can also perform the same calculation for each element of a vector
 # eg. the following example changes the unit of measurement
 heights_mm <- c(1770,1600,1890,1720,1660)
 heights_m <- heights_mm * 0.001
 print(heights_m)

 # Performing arithmetic on vectors can be useful for carrying
 # out calculations using multiple variables
 # LDL cholesterol in mmol/L
 LDLchol <- c(0.5,0.7,0.55,1.8,1.6)

 # HDL cholesterol in mmol/L
 HDLchol <- c(3.30,3.98,3.19,3.18,3.44)

 total_chol <- LDLchol + HDLchol
 print(total_chol)

 ## ---- echo=T,eval=T------------------------------------------------------
 # Import data from a csv file. Note that you need to either type the
 # full file path, or the file path relative to the current working
 # directory
 cd4_data <- read.csv(file="data/cd4.csv",header=TRUE)
 melanoma_data <- read.csv(file="data/melanoma.csv",header=TRUE)

 # Hint: you can check the working directory with the console using:
 getwd()

 # You can also change the working directory using:
 setwd("~/Dropbox/Teaching/R practicals/Intro Material/data")

 # You can also see what files are in your current directory using:
 list.files()

 ## ---- echo=T,eval=T------------------------------------------------------
 # Open a window to view the data
 View(cd4_data)

 # View the dimensions of the data
 nrow(cd4_data)
 ncol(cd4_data)

 # Have a look at the column variable names of the data
 colnames(cd4_data)

 # Check the data type of specific columns
 class(cd4_data[,"baseline"])

 ## ---- echo=T-------------------------------------------------------------
 write.csv(fellowship, file="fellowship_2.csv")
	## ----setup, include=FALSE------------------------------------------------
	knitr::opts_chunk$set(echo = TRUE)

	## ---- echo=T, eval=T-----------------------------------------------------
	2 + 2

	## ---- echo=T, eval=T-----------------------------------------------------
	a <- 2 + 2

	## ---- echo=T,eval=T------------------------------------------------------
	b <- 2.5
	hobbit_home <- "hobbiton"
	c <- 3 * 5

	example_sum <- b + c

	hobbit_home
	example_sum

	## ---- echo=T,eval=T------------------------------------------------------
	var <- 3 * 5
	var

	var <- 20
	var

	var <- "wheredidmynumbersgo"
	var

	## ---- echo=T, eval=T-----------------------------------------------------
	a <- 16
	result1 <- sqrt(a)
	result2 <- sqrt(25)

	result1
	result2

	## ---- echo=T,eval=T------------------------------------------------------
	# Since we did not specify an object name to save the result to, R will print out the result directly.
	rnorm(n=10,mean=5,sd=2)

	## ---- echo=T,eval=T------------------------------------------------------
	# This is a comment
	# This code declares two numbers and adds them together
	a <- 4
	b <- 5 # Comments can start partway through a line
	a + b

	# This bit of code will not run, as it is commented
	# c <- 6*6

	## ----echo=T,eval=T-------------------------------------------------------
	a <- "letshavealookthen"
	print(a)

	## ----echo=T,eval=T-------------------------------------------------------
	rnorm(10,
	2,
	1
	)

	## ---- echo=T,eval=T------------------------------------------------------
	x <- TRUE
	y <- FALSE

	## ---- echo=T,eval=T------------------------------------------------------
	# A vector of integers
	systolic_bp <- c(140,120,100,90,110)
	print(systolic_bp)

	# A vector of numerics
	blood_conc <- c(0.1,0.001,0.0005,0.03,0.6)
	print(blood_conc)

	# A way to create vectors of sequential integers
	person_no <- 1:5
	print(person_no)

	# A vector of characters
	person_name <- c("frodo","samwise","meriadoc", "peregrin", "aragorn")
	print(person_name)

	## ----echo=T,eval=T-------------------------------------------------------
	# Look at two vectors side by side using cbind
	cbind(systolic_bp, blood_conc)

	# Look at two vectors as rows using rbind
	rbind(systolic_bp, blood_conc)


	## ---- echo=T,eval=T------------------------------------------------------
	# Print the first and third element of the vector
	print(systolic_bp[1])
	print(systolic_bp[3])

	# Change the fourth element of the chars vector
	person_name[4] <- "fool of a took"

	## ---- echo=T,eval=T------------------------------------------------------
	# Print elements 1 to 3
	print(blood_conc[1:3])
	# Print elements 1, 3 and 6
	print(blood_conc[c(1,3,5)])

	## ---- echo=T,eval=T------------------------------------------------------
	# Print all but the second element of x
	print(blood_conc[-2])

	# Print all but the first and last element of x
	print(blood_conc[-c(1,5)])

	## ---- echo=T,eval=T------------------------------------------------------
	age <- c(50,38,36,28,87)
	fellowship <- data.frame(person_no,person_name,age, systolic_bp, blood_conc)
	print(fellowship)

	## ---- echo=T,eval=T------------------------------------------------------
	wrong <- c(1,1.325,21,"sneaky")
	print(wrong)

	## ---- echo=T, eval=T-----------------------------------------------------
	# Generate fake data
	age <- c(25,44,17,30,61)
	height <- c(170,180,150,145,188)
	person_name <- c("brock","surge","misty","erika","koga")

	# Declare a vector of characters to represent gender and blood type,
	# and convert them to factors
	gender <- factor(c("M","M","F","F","M"), levels=c("M","F","Other"))
	blood_type <- factor(c("A","B","A","AB","A"), levels=c("A","B","AB","O"))

	# Note that we have to include the argument `stringsAsFactors = FALSE`
	# to ensure that R doesn't convert the person_name vector into
	# factors automatically.
	people <- data.frame(person_no,person_name,age,height,gender,blood_type, stringsAsFactors = FALSE)
	print(people)
	print(levels(blood_type))

	## ---- echo=T,eval=T------------------------------------------------------
	# Get the age of the first person. Note the comma between the row index and column name
	print(people[1,"age"])

	# Use vector slicing to get subsets of data
	print(people[1:3,"blood_type"])
	print(people[4,c("gender","blood_type")])

	# You can omit either the row id or column name, and R will give you the entire column/row
	# Print all data for the third person
	print(people[3,])

	# Print all heights
	print(people[,"height"])

	# You can change column names after creation using "colnames"
	colnames(people) <- c("Patient_ID","Name","Age","Height","Gender","Blood_Type")
	print(people)

	## ---- echo=T,eval=T------------------------------------------------------
	# Print the Patient_ID variable from the people data frame
	print(people$Patient_ID)

	# Print the 3rd element of the Gender variable
	print(people$Gender[3])

	## ---- echo=T,eval=T------------------------------------------------------
	# Note that R goes through and multiplies each element
	age_squared <- age*age
	print(age_squared)

	# You can also perform the same calculation for each element of a vector
	# eg. the following example changes the unit of measurement
	heights_mm <- c(1770,1600,1890,1720,1660)
	heights_m <- heights_mm * 0.001
	print(heights_m)

	# Performing arithmetic on vectors can be useful for carrying
	# out calculations using multiple variables
	# LDL cholesterol in mmol/L
	LDLchol <- c(0.5,0.7,0.55,1.8,1.6)

	# HDL cholesterol in mmol/L
	HDLchol <- c(3.30,3.98,3.19,3.18,3.44)

	total_chol <- LDLchol + HDLchol
	print(total_chol)

	## ---- echo=T,eval=T------------------------------------------------------
	# Import data from a csv file. Note that you need to either type the
	# full file path, or the file path relative to the current working
	# directory
	cd4_data <- read.csv(file="data/cd4.csv",header=TRUE)
	melanoma_data <- read.csv(file="data/melanoma.csv",header=TRUE)

	# Hint: you can check the working directory with the console using:
	getwd()

	# You can also change the working directory using:
	setwd("~/Dropbox/Teaching/R practicals/Intro Material/data")

	# You can also see what files are in your current directory using:
	list.files()

	## ---- echo=T,eval=T------------------------------------------------------
	# Open a window to view the data
	View(cd4_data)

	# View the dimensions of the data
	nrow(cd4_data)
	ncol(cd4_data)

	# Have a look at the column variable names of the data
	colnames(cd4_data)

	# Check the data type of specific columns
	class(cd4_data[,"baseline"])

	## ---- echo=T-------------------------------------------------------------
	write.csv(fellowship, file="fellowship_2.csv")