Skip to content

Instantly share code, notes, and snippets.

@n8thangreen
Last active December 15, 2016 09:48
Show Gist options
  • Save n8thangreen/03a70529b55543da4b3498bba3ee2e31 to your computer and use it in GitHub Desktop.
Save n8thangreen/03a70529b55543da4b3498bba3ee2e31 to your computer and use it in GitHub Desktop.
Imperial College London MSc Public Health practical
## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ---- echo=T, eval=T-----------------------------------------------------
2 + 2
## ---- echo=T, eval=T-----------------------------------------------------
a <- 2 + 2
## ---- echo=T,eval=T------------------------------------------------------
b <- 2.5
hobbit_home <- "hobbiton"
c <- 3 * 5
example_sum <- b + c
hobbit_home
example_sum
## ---- echo=T,eval=T------------------------------------------------------
var <- 3 * 5
var
var <- 20
var
var <- "wheredidmynumbersgo"
var
## ---- echo=T, eval=T-----------------------------------------------------
a <- 16
result1 <- sqrt(a)
result2 <- sqrt(25)
result1
result2
## ---- echo=T,eval=T------------------------------------------------------
# Since we did not specify an object name to save the result to, R will print out the result directly.
rnorm(n=10,mean=5,sd=2)
## ---- echo=T,eval=T------------------------------------------------------
# This is a comment
# This code declares two numbers and adds them together
a <- 4
b <- 5 # Comments can start partway through a line
a + b
# This bit of code will not run, as it is commented
# c <- 6*6
## ----echo=T,eval=T-------------------------------------------------------
a <- "letshavealookthen"
print(a)
## ----echo=T,eval=T-------------------------------------------------------
rnorm(10,
2,
1
)
## ---- echo=T,eval=T------------------------------------------------------
x <- TRUE
y <- FALSE
## ---- echo=T,eval=T------------------------------------------------------
# A vector of integers
systolic_bp <- c(140,120,100,90,110)
print(systolic_bp)
# A vector of numerics
blood_conc <- c(0.1,0.001,0.0005,0.03,0.6)
print(blood_conc)
# A way to create vectors of sequential integers
person_no <- 1:5
print(person_no)
# A vector of characters
person_name <- c("frodo","samwise","meriadoc", "peregrin", "aragorn")
print(person_name)
## ----echo=T,eval=T-------------------------------------------------------
# Look at two vectors side by side using cbind
cbind(systolic_bp, blood_conc)
# Look at two vectors as rows using rbind
rbind(systolic_bp, blood_conc)
## ---- echo=T,eval=T------------------------------------------------------
# Print the first and third element of the vector
print(systolic_bp[1])
print(systolic_bp[3])
# Change the fourth element of the chars vector
person_name[4] <- "fool of a took"
## ---- echo=T,eval=T------------------------------------------------------
# Print elements 1 to 3
print(blood_conc[1:3])
# Print elements 1, 3 and 6
print(blood_conc[c(1,3,5)])
## ---- echo=T,eval=T------------------------------------------------------
# Print all but the second element of x
print(blood_conc[-2])
# Print all but the first and last element of x
print(blood_conc[-c(1,5)])
## ---- echo=T,eval=T------------------------------------------------------
age <- c(50,38,36,28,87)
fellowship <- data.frame(person_no,person_name,age, systolic_bp, blood_conc)
print(fellowship)
## ---- echo=T,eval=T------------------------------------------------------
wrong <- c(1,1.325,21,"sneaky")
print(wrong)
## ---- echo=T, eval=T-----------------------------------------------------
# Generate fake data
age <- c(25,44,17,30,61)
height <- c(170,180,150,145,188)
person_name <- c("brock","surge","misty","erika","koga")
# Declare a vector of characters to represent gender and blood type,
# and convert them to factors
gender <- factor(c("M","M","F","F","M"), levels=c("M","F","Other"))
blood_type <- factor(c("A","B","A","AB","A"), levels=c("A","B","AB","O"))
# Note that we have to include the argument `stringsAsFactors = FALSE`
# to ensure that R doesn't convert the person_name vector into
# factors automatically.
people <- data.frame(person_no,person_name,age,height,gender,blood_type, stringsAsFactors = FALSE)
print(people)
print(levels(blood_type))
## ---- echo=T,eval=T------------------------------------------------------
# Get the age of the first person. Note the comma between the row index and column name
print(people[1,"age"])
# Use vector slicing to get subsets of data
print(people[1:3,"blood_type"])
print(people[4,c("gender","blood_type")])
# You can omit either the row id or column name, and R will give you the entire column/row
# Print all data for the third person
print(people[3,])
# Print all heights
print(people[,"height"])
# You can change column names after creation using "colnames"
colnames(people) <- c("Patient_ID","Name","Age","Height","Gender","Blood_Type")
print(people)
## ---- echo=T,eval=T------------------------------------------------------
# Print the Patient_ID variable from the people data frame
print(people$Patient_ID)
# Print the 3rd element of the Gender variable
print(people$Gender[3])
## ---- echo=T,eval=T------------------------------------------------------
# Note that R goes through and multiplies each element
age_squared <- age*age
print(age_squared)
# You can also perform the same calculation for each element of a vector
# eg. the following example changes the unit of measurement
heights_mm <- c(1770,1600,1890,1720,1660)
heights_m <- heights_mm * 0.001
print(heights_m)
# Performing arithmetic on vectors can be useful for carrying
# out calculations using multiple variables
# LDL cholesterol in mmol/L
LDLchol <- c(0.5,0.7,0.55,1.8,1.6)
# HDL cholesterol in mmol/L
HDLchol <- c(3.30,3.98,3.19,3.18,3.44)
total_chol <- LDLchol + HDLchol
print(total_chol)
## ---- echo=T,eval=T------------------------------------------------------
# Import data from a csv file. Note that you need to either type the
# full file path, or the file path relative to the current working
# directory
cd4_data <- read.csv(file="data/cd4.csv",header=TRUE)
melanoma_data <- read.csv(file="data/melanoma.csv",header=TRUE)
# Hint: you can check the working directory with the console using:
getwd()
# You can also change the working directory using:
setwd("~/Dropbox/Teaching/R practicals/Intro Material/data")
# You can also see what files are in your current directory using:
list.files()
## ---- echo=T,eval=T------------------------------------------------------
# Open a window to view the data
View(cd4_data)
# View the dimensions of the data
nrow(cd4_data)
ncol(cd4_data)
# Have a look at the column variable names of the data
colnames(cd4_data)
# Check the data type of specific columns
class(cd4_data[,"baseline"])
## ---- echo=T-------------------------------------------------------------
write.csv(fellowship, file="fellowship_2.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment