Skip to content

Instantly share code, notes, and snippets.

@aurora-mareviv
Last active May 30, 2018 17:38
Show Gist options
  • Save aurora-mareviv/7448898 to your computer and use it in GitHub Desktop.
Save aurora-mareviv/7448898 to your computer and use it in GitHub Desktop.
Small R scripts to learn R and basic statistics at the same time.
##########################################
### COMMANDS TO START WITH R CONSOLE 1 ###
##########################################
# Under GNU-GPL license v.3
# First, install R: http://cran.r-project.org/
# Second, install and open R-Studio: http://www.rstudio.com/ide/download/desktop
# You can open this text file from inside R-studio, or with a basic text editor (like Notepad).
# Just copy and paste the following commands, and press ENTER to see the results.
# You can recycle these commands to make new ones suitable for your own data!
# Note: the lines of text preceeded by "#" are considered comments, and they are not executed even if they are pasted.
# To open a help page write ? and the name of the function.
?log
?boxplot
#############################
######### R OBJECTS #########
#############################
# R is primarily a functional language, however it has the capability of working in an object-oriented manner.
# This means that is able to store data chunks (you can call them "objects")
oneobject <- 2
oneobject
anotherobject <- 3+6
anotherobject
# It also overwrites an object if it is given another value
oneobject <- 6^2
oneobject
# R objects can do many useful things, saving time in long calculations
x <- 45
y <- 28*0.07
mycalc <- (log(3)+x)*5*(6+y)^2
mycalc
#############################
##### CREATING DATABASES ####
#############################
# Any R data matrix is composed of vector-objects, either of: numbers, text, or logical (TRUE/FALSE). Text must be inside plain "" symbols, which are different from MS Windows quotation marks “ ”.
# The "oneobject" above, is a vector of lenght=1.
# Note: try not to name any object as "data", it can confuse R.
a <- c(1,5,6,8,7,3,4,7,9,9,3,4,2)
b <- c("blue","red","blue","yellow","orange","black","green","purple","white","pink","gray","blue","black")
c <- c(TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE)
# You can know the type of vector by typing: class(myvector).
class(a)
class(b)
class(c)
# We can bind vectors to construct a matrix.
obj <- rbind(a,b,c) # binds by row
obj
obj2 <- cbind(a,b,c) # binds by column
obj2
class(obj2)
# We can transform "obj" to a data frame in "long" format (data frames are more common than matrices).
obj <- data.frame(a,b,c)
class(obj)
# View your database.
View(obj)
# Edit your database.
fix(obj)
# Save your database.
save(obj, file="obj.Rdata")
##############################
### DESCRIPTIVE STATISTICS ###
##############################
# Let's use real data:
# Note that: var1, var2, var3, var4, var5 and var6 are not numbers, but factors.
# To change var1 to numeric, just type:
var1 <- as.numeric(var1)
# To return to factor, type:
var1 <- as.factor(var1)
var0 <- c(64,76,66,47,72,82,66,58,64,69,80,57,66,63,71,55,57,71,45,77,69,61,47,55,59,45,61,56,68,74,55,71,72,64,52,62,69,53,76,62,65,54,73,79,74,38,61,73,75,71,74,52,48,45,65,59,79,78,82,61,70,73,68,76,71,65,78,65,80,69,73,70,80,50,67,70,63,68,70,37,73,68,53,73,79,76,68,80,79,78,75,82,72,77,75,66,72,66,66,58,70,71,69,81,74,45,73,81,71,76,79,80,72,64,81,79,74,69,75,56,62,60,83,55,74,47,70,59,47,81,73,59,74,70,69,81,29,77,72,78,68,60,65,83,66,76,68,80,82,79,74,74,22,62,81,75,75,44,72,75,72,77,76,67,70,76,42,56,76,84,75,76,83,73,63,50,78,70,79,74,59,54,74,68,72,82,69,48,64,48,78,62,87,79,65,70,85,80,79,72,71,77,76,79,76,81,82,70,65,79,76,83,75,63,52,80,86,63,68,69,78,82,74,86,69,85,72,64,57,74,78,22,77,64,63,51,74,71,71,47,63,66,73,43,72,75,81,40,56,71,77,77,71,69,68,58,64,76,73,81,73,81,59,68,57,62,66,76,66,47,72,82,66,58,64,69,80,57,66,63,71,55,57,71,45,77,69,61,47,55,59,45,61,56,68,74,52,71,72,64,52,62,69,53,76,62,65,54,73,79,74,38,61,73,75,71,75,82,72,77,75,66,72,66,66,58,81,74,45,73,81,71,76,79,80,72,64,81,
79,74,69,75,56,62,60,83,55,74,47,70,59,47,81,73,59,74,70,69,81,29,77,72,78,68,60,65,83,66,76,68,80,82,79,74,74,22,62,81,75,75,44,72,75,72,77,76,67,70,76,42,56,76,84,75,76,83,73,63)
var1 <- c("1","2","1","2","1","1","1","1","1","1","1","1","1","1","1","2","2","1","1","1","2","1","2","1","1","1","1","1","1","1","2","2","1","2","1","1","1","1","1","1","2","1","1","1","1","1","1","2","2","1","2","1","2","2","1","1","1","2","1","2","2","2","2","2","1","1","1","1","1","1","1","1","2","1","1","1","2","1","1","1","1","2","1","1","1","1","1","2","1","2","1","1","1","1","1","1","1","2","1","1","1","2","1","1","2","1","1","2","2","2","1","2","1","2","1","1","1","1","2","2","2","2","2","1","1","1","1","2","2","1","1","1","1","1","1","2","1","1","2","1","2","1","1","1","1","1","2","1","1","1","1","1","2","1","1","2","1","2","1","1","1","1","1","2","1","2","2","1","2","2","1","1","1","1","1","2","2","1","1","1","2","1","2","1","1","1","1","1","1","1","1","1","1","2","1","1","1","2","2","1","2","1","1","1","1","2","1","1","1","1","1","1","2","2","1","1","1","1","2","1","1","1","1","2","1","2","1","1","2","1","1","1","1","1","2","2","1","1","1","1","1","1","1","1","1","2","1","2","1","1","1","2","1",
"2","2","1","1","1","1","1","1","1","1","1","1","2","1","2","1","1","1","1","2","1","1","1","2","1","2","2","1","1","1","1","2","1","1","1","1","1","1","1","1","1","1","1","2","1","1","1","2","1","1","1","2","2","1","1","2","2","1","1","1","2","1","1","1","1","1","2","1","1","1","1","1","2","1","2","2","1","1","1","1","1","2","1","2","1","1","1","1","1","1","2","1","2","1","1","1","1","1","1","2","2","1","1","2","1","2","2","1","2","2","1","2","2","2","1","2","1","2","2","1","1","1","1","1","1","1","1","1","2","2","1","1","2","2","2","1","2","2","2","2","1","2","1","1","2")
var2 <- c("0","1","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","1","0","1","0","0","0","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","0","0","1","1","0","0","0","0","1","0","1","1","0","0","0","0","0","0","0","0","0","1","0","0","1","0","0","1","1","0","0","0","0","1","1","1","1","0","0","1","0","0","0","0","1","0","1","0","0","0","0","0","0","0","1","0","1","0","0","1","1","1","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","0","0","1","1","0","0","0","0","1","0","1","0","0","0","0","1","0","1","1","0","0","0","0","0","1","0","0","0","1","0","0","0","0","0","0","1","0","1","0","0","0","0","0","1","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","1","1","1","0","0","1","0","1","0","1","0","0","1","0","0","0","1","0","0","1","1","0","0","0","0","1","0","1","1","0","1","1","1","0","0","0","0","0","1","0","1","0","0","0","0","1","0","0",
"0","0","0","0","0","1","0","0","0","1","0","1","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","0","0","1","1","0","1","0","0","1","0","0","0","0","0","0","0","0","1","0","0","0","1","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0")
var3 <- c("0","0","0","0","0","1","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","1","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","1","0","1","0","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","1","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","1","0","0","0","0","1","0","1","0","0","0","1","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","1","0","0","1","0","0","0","0","0","0","1","1","0","0","0","1","0","0","0","0","0","0","0","1","0","0","1","0","0","0","1","0","0","0","0","0","0","0","0","1","0","0","1","0","0","1","0","0","0","0","1","0","0","0","0","0","0","0","0","1","0","0","0","1","0","0","0","1","1","1","1","0","0","0","0","0","0","0","0","0","0",
"0","0","1","0","0","0","1","0","0","0","0","1","0","0","0","1","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","1","1","0","1","0","0","0","0","0","0","0","0","1","1","1","0","1","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","1","0","0","1","0","0","0","0","1","0","1","0","1","0","0","0","1","0","0","0","0","0","0","0","0","0","1","1","0","0","0","1","0","0","0","0","0","0","1","0","1","0","1","1","1","1","0","0","0","0","1","0","0","1","0","0","0","1","1","0","0","0","0","0","0","0","0","0","1","1","0","0","0","1","0","0","0","1","0")
var4 <- c("0","1","0","0","0","0","1","0","1","1","0","1","0","1","1","0","0","0","1","0","0","0","0","0","0","0","1","1","0","0","1","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","1","1","0","1","0","1","1","1","0","1","0","0","0","0","0","0","0","1","0","1","0","0","0","0","0","1","0","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","1","1","1","1","0","0","0","1","0","0","0","0","0","0","1","0","0","0","1","1","0","0","1","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","1","1","1","0","0","0","1","0","1","1","1","1","0","1","1","1","1","1","1","1","0","1","1","1","1","1","1","1","1","1","1","1","0","1","1","1","1","1","1","0","0","0","1","0","1","0","0","0","1","1","0","0","1","0","1","0","1","0","0","1","1","1","1","1","0","0","0","0","0","0","1","0","1","1","0","1","1","0","1","0","1","0","1","0","0","0","0","1","1","1","1","0","0","1","1","1","1","1","0","1","0","1","1","1","1","0","0","1","1","1","1","1","1","1","1","1","0","1","1","1","1","1",
"0","1","1","1","0","1","0","0","1","1","1","1","0","0","1","1","0","0","1","1","0","0","1","0","1","1","0","0","1","0","0","0","0","0","0","0","1","0","1","0","0","0","0","0","1","1","0","0","0","1","0","1","0","0","0","1","0","0","1","1","1","1","1","0","1","1","1","0","1","1","1","0","0","1","1","1","1","1","0","1","1","1","0","1","1","0","0","0","0","0","1","0","1","0","1","1","1","1","0","1","1","0","1","0","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1","0","0","0","1","1","1","0","0","0","0","1","1","1","1","1","1","1","1","0","1","1","1","1","0","1","0")
var5 <- c("0","1","1","0","0","1","0","0","0","0","0","1","0","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","1","0","1","0","0","0","0","0","1","0","0","0","0","0","1","0","0","0","0","0","1","0","0","0","0","1","0","0","1","0","0","1","0","0","1","1","0","0","0","0","0","1","0","0","1","0","1","1","1","0","0","0","0","0","0","0","0","1","0","0","0","0","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","1","1","0","1","1","0","1","1","1","1","1","1","0","1","1","0","0","1","1","0","1","0","0","0","1","0","1","1","1","0","0","0","1","0","0","0","0","0","0","1","0","0","0","0","1","0","1","0","0","1","1","0","0","1","1","1","0","0","0","0","0","0","1","1","0","1","1","1","1","0","1","0","1","1","0","0","0","0","1","1","1","1","0","1","1","0","0","1","0","1","1","0","1","1","0","1","0","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1",
"0","1","1","1","0","1","0","0","1","0","1","0","0","0","1","1","0","0","1","0","0","0","1","0","0","1","0","0","0","0","0","1","0","0","0","0","0","1","0","1","0","0","0","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","1","1","0","1","1","1","1","0","1","1","0","1","0","0","1","1","1","1","1","1","0","1","1","0","1","1","1","0","0","0","0","0","1","1","1","0","1","1","0","0","0","1","0","0","1","0","0","0","1","1","1","1","1","1","1","0","1","0","1","1","0","1","1","0","1","1","1","0","0","0","0","1","1","1","1","0","1","1","1","0","1","1","1","1","1","1","0")
var6 <- c("1","1","0","0","1","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","0","0","0","0","0","0","0","0","0","0","1","0","0","1","1","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","0","0","0","0","0","0","1","0","0","0","1","0","0","1","1","1","0","0","0","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","0","0","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","0","0",
"1","1","1","1","0","0","1","1","0","1","1","1","1","1","1","1","1","1","1","1","1","1","0","0","0","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","0","0","1","1","1","1","0","0","0","0","1","0","0","0","1","1","1","1","1","1","0","1","0","0","0","1","1","0","0","1","1","0","1","1","1","1","1","1","1","1","0","0","0","1","1","1","0","1","1","1","1","0","1","0","0","1","1","1","1","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","0")
mydata <- data.frame(var0,var1,var2,var3,var4,var5,var6) # joins the vectors into a data frame
mydata
View(mydata)
fix(mydata)
# Global summary of my data.
summary(mydata)
# Summary of any sub-object inside the bigger object "mydata": summary(obj$my.variable.name)
summary(mydata$var0)
mean(mydata$var0) # mean
sd(mydata$var0) # standard deviation
summary(mydata$var3)
# The variable names of my data (important if R has changed them after importing from other data formats)
names(mydata)
# To change var1 to numeric, just type:
mydata$var1 <- as.numeric(mydata$var1)
# To return to factor, type:
mydata$var1 <- as.factor(mydata$var1)
########### PLOTS ############
# if the covariate is continuous, the histogram is adequate:
hist(obj$a)
hist(mydata$var0) # a very basic histogram
hist(mydata$var0, breaks=30)
?hist # help page on the "hist" function
# for dichotomic covariates (YES/NO covariates), we have the barplot:
plot(obj$b)
plot(mydata$var1)
# Bivariate plots (dichotomic versus numerical): boxplots
boxplot(var0 ~ var3, data = mydata, col = "lightgray")
boxplot(var0 ~ var2, data = mydata, col = "blue")
##############################
#### ANALYTIC STATISTICS #####
##############################
# First, know if our variables are non-parametric
# SHAPIRO-WILK's test for normality
shapiro.test(mydata$var0) # if the p-value is < 0.05, the variable is not normal. A p-value of 7.342e-14 equals: 7.342 * 0.00000000000001
# Plots to represent normality: q-q plots
qqnorm(mydata$var0)
qqline(mydata$var0)
library(car) # an additional library to load before the next command
qqPlot(mydata$var0)
# LEVENE's test of equality of variances (homoscedasticity), comparing var0 to var1:
tapply(mydata$var0, mydata$var1, var, na.rm=TRUE)
leveneTest(mydata$var0, mydata$var1, center=median) # if the p-value [Pr(>F)] is < 0.05, the variable is not homoscedastic.
# CHI-SQUARE
# analyses two dichotomic covariates (like a bivariate bar plot)
mytab <- xtabs(~var1 + var2, data=mydata)
mytab
mytest <- chisq.test(mytab, correct=FALSE)
mytest # p-value 0.6488,there's no relationship between the incidence of infections and diabetes in our patients
fisher.test(mytab) # we need to do Fisher Test instead of Chi-square if any of the cells has a count <5
# WILCOXON TEST
# the equivalent of Student's t-test for non-paramentric covariates (like a bivariate histogram or boxplot)
# analyses a dichotomic vs. a numeric factor
tapply(mydata$var0, mydata$var3, median, na.rm=TRUE)
wilcox.test(var0 ~ var3, alternative="two.sided", data=mydata)
# LOGISTIC REGRESSION
g1 <- glm(var2 ~ var0 + var1, family=binomial(logit), data=mydata)
summary(g1)
# R COMMANDER
# I generally do not recommend working with the RCommander interface, as it can be pretty limiting, specially when trying to develop your own variations of a given command.
# For the purpose of writing, RStudio is much easier and you can learn R much faster, focusing on what's important!
# If you still want to try RCommander, just type:
## Install R Commander:
# install.packages("Rcmdr")
## Execute R commander for first time in a session.
# library(Rcmdr)
## To execute Rcmdr for second or more times (after being closed in a session):
# Commander()
##########################################
### COMMANDS TO START WITH R CONSOLE 2 ###
##########################################
# Under GNU-GPL license v.3
# First, install R: http://cran.r-project.org/
# Second, install and open R-Studio: http://www.rstudio.com/ide/download/desktop
# You can open this text file from inside R-studio, or with a basic text editor (like Notepad).
# Just copy and paste the following commands, and press ENTER to see the results.
# You can recycle these commands to make new ones suitable for your own data!
# Note: the lines of text preceeded by "#" are considered comments, and they are not executed even if they are pasted.
# To open a help page write ? and the name of the function.
?log
?boxplot
#############################
### IMPORT - EXPORT DATA ####
#############################
# We will use different packages (libraries) to import data.
# Important: read this link: http://cran.r-project.org/doc/manuals/r-release/R-data.html
# Note: you will have to choose your working directory (e.g. a folder named "My data")
# Then you need to change the working directory of R to that folder. For example:
setwd("C:/Users/Me/My Documents/.../My data")
####### 1. SPSS DATA ########
# We need to have our data in the .sav format, and store it in our folder.
# The following commands import data from SPSS to R format.
library(foreign)
mydata <- read.spss("somedata.sav", use.value.labels=T, to.data.frame=T)
?read.spss # help page about the function "read.spss" and its arguments/options
save(mydata, file="mydata.RData")
# You will need to review the data imported to make sure there are no inconsistencies.
# Some errors can be fixed in the SPSS file before importing.
names(mydata)
summary(mydata)
View(mydata)
# fix(mydata) # opens data for direct editing, not recommended
# To change var1 from numeric to factor, just type:
mydata$var1 <- as.factor(mydata$var1)
# To return to numeric, type:
mydata$var1 <- as.numeric(mydata$var1)
# Save (overwrite) the corrected file
save(mydata, file="mydata.RData")
###### 2. EXCEL DATA ########
# You will need to review the data imported to make sure there are no inconsistencies.
# Some errors can be fixed in the Excel file before importing.
names(mydata)
summary(mydata)
View(mydata)
fix(mydata)
# To change var1 from numeric to factor, just type:
mydata$var1 <- as.factor(mydata$var1)
# To return to numeric, type:
mydata$var1 <- as.numeric(mydata$var1)
# In .XLS format (Excel 2003)
# Option 1: use readxl (updated! this is my favourite option now)
library("readxl")
rutaxl <- "C:/Users/Me/My Documents/.../My data/somedata.xls"
mydata <- read_excel(rutaxl, sheet = 1)
# the second argument (aka "sheet = 1") imports the first sheet in the Excel file.
# to import the second sheet use "sheet = 2" and so on
# Option 2: use gdata (can give errors if Perl modules are missing)
library(gdata)
mydata <- "C:/Users/Me/My Documents/.../My data/somedata.xls"
mydata <- read.xls(mydata, sheet='name of my sheet')
names(mydata)
summary(mydata)
View(mydata)
fix(mydata)
# Save the corrected file
save(mydata, file="mydata.RData")
# Option 3: XLConnect (can give errors if Java modules are missing)
library(XLConnect, pos=4)
.Workbook <- loadWorkbook("C:/Users/Me/My Documents/.../My data/somedata.xls")
mydata <- readWorksheet(.Workbook, "Sheet1")
remove(.Workbook)
names(mydata)
summary(mydata)
View(mydata)
fix(mydata)
# Save the corrected file
save(mydata, file="mydata.RData")
# Option 4: use R Commander, from the Menu>>Data>>Import Data
# Other options available: http://cran.r-project.org/doc/manuals/r-release/R-data.html
# In .XLSX format (Excel 2007-2010)
# Option 1: use readxl (updated! this is my favourite option now)
library("readxl")
rutaxl <- "C:/Users/Me/My Documents/.../My data/somedata.xlsx"
mydata <- read_excel(rutaxl, sheet = 1)
# the second argument (aka "sheet = 1") imports the first sheet in the Excel file.
# to import the second sheet use "sheet = 2" and so on
# Option 2: use gdata (can give errors if Perl modules are missing)
library(gdata)
mydata <- "C:/Users/Me/My Documents/.../My data/somedata.xlsx"
mydata <- read.xls(mydata, sheet='name of my sheet')
names(mydata)
summary(mydata)
View(mydata)
fix(mydata)
# Save the corrected file
save(mydata, file="mydata.RData")
# Option 3: XLConnect (can give errors if Java modules are missing)
library(XLConnect, pos=4)
.Workbook <- loadWorkbook("C:/Users/Me/My Documents/.../My data/somedata.xlsx")
mydata <- readWorksheet(.Workbook, "Sheet1")
remove(.Workbook)
names(mydata)
summary(mydata)
View(mydata)
fix(mydata)
# Save the corrected file
save(mydata, file="mydata.RData")
# Other options available: http://cran.r-project.org/doc/manuals/r-release/R-data.html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment