Last active
December 15, 2016 09:48
-
-
Save n8thangreen/03a70529b55543da4b3498bba3ee2e31 to your computer and use it in GitHub Desktop.
Imperial College London MSc Public Health practical
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## ----setup, include=FALSE------------------------------------------------ | |
knitr::opts_chunk$set(echo = TRUE) | |
## ---- echo=T, eval=T----------------------------------------------------- | |
2 + 2 | |
## ---- echo=T, eval=T----------------------------------------------------- | |
a <- 2 + 2 | |
## ---- echo=T,eval=T------------------------------------------------------ | |
b <- 2.5 | |
hobbit_home <- "hobbiton" | |
c <- 3 * 5 | |
example_sum <- b + c | |
hobbit_home | |
example_sum | |
## ---- echo=T,eval=T------------------------------------------------------ | |
var <- 3 * 5 | |
var | |
var <- 20 | |
var | |
var <- "wheredidmynumbersgo" | |
var | |
## ---- echo=T, eval=T----------------------------------------------------- | |
a <- 16 | |
result1 <- sqrt(a) | |
result2 <- sqrt(25) | |
result1 | |
result2 | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Since we did not specify an object name to save the result to, R will print out the result directly. | |
rnorm(n=10,mean=5,sd=2) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# This is a comment | |
# This code declares two numbers and adds them together | |
a <- 4 | |
b <- 5 # Comments can start partway through a line | |
a + b | |
# This bit of code will not run, as it is commented | |
# c <- 6*6 | |
## ----echo=T,eval=T------------------------------------------------------- | |
a <- "letshavealookthen" | |
print(a) | |
## ----echo=T,eval=T------------------------------------------------------- | |
rnorm(10, | |
2, | |
1 | |
) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
x <- TRUE | |
y <- FALSE | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# A vector of integers | |
systolic_bp <- c(140,120,100,90,110) | |
print(systolic_bp) | |
# A vector of numerics | |
blood_conc <- c(0.1,0.001,0.0005,0.03,0.6) | |
print(blood_conc) | |
# A way to create vectors of sequential integers | |
person_no <- 1:5 | |
print(person_no) | |
# A vector of characters | |
person_name <- c("frodo","samwise","meriadoc", "peregrin", "aragorn") | |
print(person_name) | |
## ----echo=T,eval=T------------------------------------------------------- | |
# Look at two vectors side by side using cbind | |
cbind(systolic_bp, blood_conc) | |
# Look at two vectors as rows using rbind | |
rbind(systolic_bp, blood_conc) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Print the first and third element of the vector | |
print(systolic_bp[1]) | |
print(systolic_bp[3]) | |
# Change the fourth element of the chars vector | |
person_name[4] <- "fool of a took" | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Print elements 1 to 3 | |
print(blood_conc[1:3]) | |
# Print elements 1, 3 and 6 | |
print(blood_conc[c(1,3,5)]) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Print all but the second element of x | |
print(blood_conc[-2]) | |
# Print all but the first and last element of x | |
print(blood_conc[-c(1,5)]) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
age <- c(50,38,36,28,87) | |
fellowship <- data.frame(person_no,person_name,age, systolic_bp, blood_conc) | |
print(fellowship) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
wrong <- c(1,1.325,21,"sneaky") | |
print(wrong) | |
## ---- echo=T, eval=T----------------------------------------------------- | |
# Generate fake data | |
age <- c(25,44,17,30,61) | |
height <- c(170,180,150,145,188) | |
person_name <- c("brock","surge","misty","erika","koga") | |
# Declare a vector of characters to represent gender and blood type, | |
# and convert them to factors | |
gender <- factor(c("M","M","F","F","M"), levels=c("M","F","Other")) | |
blood_type <- factor(c("A","B","A","AB","A"), levels=c("A","B","AB","O")) | |
# Note that we have to include the argument `stringsAsFactors = FALSE` | |
# to ensure that R doesn't convert the person_name vector into | |
# factors automatically. | |
people <- data.frame(person_no,person_name,age,height,gender,blood_type, stringsAsFactors = FALSE) | |
print(people) | |
print(levels(blood_type)) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Get the age of the first person. Note the comma between the row index and column name | |
print(people[1,"age"]) | |
# Use vector slicing to get subsets of data | |
print(people[1:3,"blood_type"]) | |
print(people[4,c("gender","blood_type")]) | |
# You can omit either the row id or column name, and R will give you the entire column/row | |
# Print all data for the third person | |
print(people[3,]) | |
# Print all heights | |
print(people[,"height"]) | |
# You can change column names after creation using "colnames" | |
colnames(people) <- c("Patient_ID","Name","Age","Height","Gender","Blood_Type") | |
print(people) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Print the Patient_ID variable from the people data frame | |
print(people$Patient_ID) | |
# Print the 3rd element of the Gender variable | |
print(people$Gender[3]) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Note that R goes through and multiplies each element | |
age_squared <- age*age | |
print(age_squared) | |
# You can also perform the same calculation for each element of a vector | |
# eg. the following example changes the unit of measurement | |
heights_mm <- c(1770,1600,1890,1720,1660) | |
heights_m <- heights_mm * 0.001 | |
print(heights_m) | |
# Performing arithmetic on vectors can be useful for carrying | |
# out calculations using multiple variables | |
# LDL cholesterol in mmol/L | |
LDLchol <- c(0.5,0.7,0.55,1.8,1.6) | |
# HDL cholesterol in mmol/L | |
HDLchol <- c(3.30,3.98,3.19,3.18,3.44) | |
total_chol <- LDLchol + HDLchol | |
print(total_chol) | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Import data from a csv file. Note that you need to either type the | |
# full file path, or the file path relative to the current working | |
# directory | |
cd4_data <- read.csv(file="data/cd4.csv",header=TRUE) | |
melanoma_data <- read.csv(file="data/melanoma.csv",header=TRUE) | |
# Hint: you can check the working directory with the console using: | |
getwd() | |
# You can also change the working directory using: | |
setwd("~/Dropbox/Teaching/R practicals/Intro Material/data") | |
# You can also see what files are in your current directory using: | |
list.files() | |
## ---- echo=T,eval=T------------------------------------------------------ | |
# Open a window to view the data | |
View(cd4_data) | |
# View the dimensions of the data | |
nrow(cd4_data) | |
ncol(cd4_data) | |
# Have a look at the column variable names of the data | |
colnames(cd4_data) | |
# Check the data type of specific columns | |
class(cd4_data[,"baseline"]) | |
## ---- echo=T------------------------------------------------------------- | |
write.csv(fellowship, file="fellowship_2.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment