nickpettican · June 15, 2016 15:14
diff --git a/stats01.R b/stats01.R
 # CHAPTER 1

 # FUNDAMENTALS

 # The response variable is the things you are working on, the variable whose variation you are attempting to understand.
 # This is the variable that goes in the "y" axis of the graph (the ordinate). 
 # The explanatory variable goes on the "x" axis of the graph (the abscissa).

 # The explanatory variables:
 # - Regression: all explanatory variables continuous.
 # - ANOVA: all explanatory variables categorical.
 # - ANCOVA: some explanatory variables continuous some categorical.

 # The response variable:
 # - Regression, ANOVA or ANCOVA: continuous.
 # - Logistic regression: proportion.
 # - Log linear models: count.
 # - Binary logistic analysis: binary.
 # - Survival analysis: time at death.

 # EVERYTHING VARIES:

 # Heterogeneity is universal, so we need a way of discriminating between variation that is scientifically interesting
 # and variation that just reflects background heterogeneity.

 worms <- read.csv("C:\\MSc\\Statistics\\worms.csv")
 # opens the file worms.csv

 attach(worms)
 # takes a "database" 

 names(worms)
 # prints the name of the columns

 # subscripts in R appear within scare brackets, thus y[7] is the 7th element of the vector "y"
 # z[2,6] it is the second row of the 6th column.
 # [, means all the rows
 # ,] means all the columns

 worms[,1:3]
 # to view the first, second and third column.

 worms[5:15,]
 # to view the middle 11 rows of all the columns of the dataframe

 worms[Area>3&Slope<3,]
 # this selects onyl the rows which have Area > 3 and Slope <3 using ']

 worms[order(Area),]
 # this sorts the variable "Area" in ascending order

 worms[rev(order(worms[,5])),c(5,7)]
 # this sorts the 5th and 7th column into descending order

 # SUMMARISING THE CONTENTS OF A DATAFRAME

 summary(worms)

 with(worms,tapply(Worm.density,Vegetation,mean))
 # to find out the mean number under different variables
 # for a single response variable you can use "tapply" and "with"

 aggregate(worms[,c(2,3,5,7)], list(Vegetation),mean)
 # "aggregate" calculates the mean values of all the continuous variables summarised at the same time
 # "aggregate" assigns the word "Group.1" to the "Vegetation" column as a default
 aggregate(worms[,c(2,3,5,7)], list(Communit=Vegetation),mean)
 # this assigns the word "Community to the column instead

 aggregate(worms[,c(2,3,5,7)], list(Moisture=Damp,Community=Vegetation),mean)
 # you can do multiple classifications using two or more categorical explanatory variables

 with(worms,tapply(Slope,list(Damp,Vegetation),mean))
 # tapply operator produces NA for missing combinations

 # GET TO KNOW YOUR DATA

 head(data)
 # prints the first 6 lines of the column

 plot(y)
 # produces a scatterplot with the values of "y"

 which(y>10)
 # prints the number of the line that has a value of >10

 y[50]<-2.179386
 # assigns the value 2.179386 to y[50]

 table(treatment)
 # this checks the factor levels of the variable "treatment"

 # in this case we found a mistake in one of the entries
 which(treatment=="nitogen")
 # this finds the mistake: in this case "nitogen"
 treatment[11]<-"nitrogen"
 # this fixes the mistake, attributing the word "nitrogen" to the entry. 
 # Note that words need to be in brackets ""

 # to save the file 
 write(x, file = "data",
      ncolumns = if(is.character(x)) 1 else 5,
      append = FALSE, sep = " ")
 # it should work just by typing:
 write.csv(name_of_the_file, file = "new_name.csv")

 # when saving files it is worth setting a directory to save the files

 getwd()
 # this prints the current working directory

 setwd("C:\\MSc\\Statistics\\Satistics")
 # this sets the current directory
 # note that "C:\\" must be in capitols

 detach(data) 
 # this stops R from using the file "data" again

 rm(data)
 # this operator removes "data" from R

 # RELATIONSHIPS

 plot(x,y,pch=21,bg="red")
 # the response variable is "y" and the explanatory variable is "x" 
 # this plots a graph with red spots as the values in the entries
 # pch stands for plotting character, pch=21 gives a black edge and bright colour fill
 # bg is background = red

 plot(factor(month),upper)
 # this produces a box-and-whisker plot which is useful for error-checking
 # R produces this when the operator 'plot' is followed by a categorical explanatory variable ('factor(month)')

 # LOOKING FOR INTERACTIONS BETWEEN CONTINUOUS VARIABLES

 # we now have a table with three columns: x, y and z

 par(mfrow=c(1,2))
 # this alters the graphics parameter to specify two sets of axis on the same row

 plot(x,y)
 plot(z,y)
 # now these two graphs are side by side

 windows(7,7)
 # windows operator changes the shape of the plotting window
 # this command is not needed for RStudio

 coplot(y~x|z,pch=16,panel=panel.smooth)
 # coplot plots y against x conditional of (or given) the value of z
 # "|" is read "given"
 # this splits the data into six graphs
 # pch=16 is for solid black plotting symbols
 # panel=panel.smooth is to fit a trend line through the scatterplot in red
 # it also produces shaded horizontal bars (shingles) which overlap as they share data points 

 # INTERACTIONS INVOLVING CATEGORICAL VARIABLES

 par(mfrow=c(1,2))
 # alters the graphics parameter to specify two sets of axis on the same row

 plot(nitrogen,yield,main="Nitrog")
 plot(phosphorus,yield,main="Phosph")
 # plots show the 'main effects' of nitrogen and phosphorous in separate graphs

 tapply(yield,list(nitrogen,phosphorus),mean)
 # 'tapply' summarises variables
 # useful for breaking up a vector into groups defined by some classifying factor, compute a factor on the subsets and returning the result in a convenient form

 barplot(tapply(yield,list(nitrogen,phosphorus),mean),beside = TRUE,xlab = "phosphorous")
 # barplot is used to output the interactions calculated by tapply graphically
 # xlab is similar to 'main', it adds labels to the plot

 legend(locator(1),legend = c("no","yes"),title="nitrogen",fill=c("black","lightgrey"))
 # 'locator' allows to put the legend in a place where it does no interfere with any of the bars
 # click to add the legend

 # APPENDIX, ESSENTIALS OF THE R LANGUAGE

 # R AS A CALCULATOR

 log(42/7.3)

 log(16,2)

 5+6+3+6+4+2+4+8+3+2+7

 2+3; 5*7; 3-7

 8^(1/3)

 abs(5.7-6.8)/0.38

 options(digits=3)
 # controls the number of digits printed

 # BUILT-IN FUNCTIONS

 exp(1)
 # antilog function of the base e

 pi

 sin(pi/2)

 cos(pi/2)

 # MODULO AND INTERGER QUOTIENTS

 119%/%13
 # integer

 119%%13
 # modulo: whats left over 
 # modulo can be used to test whether numbers are odd or even

 15421%%7==0
 # modulo can also be used to find out whether one number is an exact multiple of another

 # ASSIGNMENT

 x<-5 

 # ROUNDING

 floor(5.7)
 ceiling(5.7)

 rounded<-function(x)floor(x+0.5)
 rounded(5.7)

 # INFINITY AND THINGS THAT ARE NOT A NUMBER

 3/0
 # Inf

 -12/0
 # -Inf

 exp(-Inf)
 # 0

 0/Inf
 # 0

 (0:3)^Inf
 # 0:3 generates a series (0,1,2,3) 
 # It produces a vector of answers 0 1 Inf Inf

 0/0
 # NaN (Not a Number)

 is.finite(10)
 # TRUE

 is.infinite(10)
 # FALSE

 # MISSING VALUES (NA)

 x<-c(1:8,NA)
 mean(x)
 # this will come up NA
 # the NA needs to be removed

 mean(x,na.rm=T)
 # this removes NA

 (vmv<-C(1:6,NA,NA,9:12))
 # attribute values to 'vmv'

 seq(along=vmv)[is.na(vmv)]
 # 'is.na(x)' locates the missing values whithin a vector

 which(is.na(vmv)
 # same result as before

 (vmv[is.na(vmv)]<-0)
 # this attributes '0' to the NA values

 vmv<-c(1:6,NA,NA,9:12)
 ifelse(is.na(vmv),0,vmv)
 # 'ifelse' does the same

 # CREATING A VECTOR

 y<-10:16
 y<-(10,11,12,13,14,15,16)
 y<-scan()
 # 'scan' manual input through keyboard

 (counts<-c(25,12,7,4,6,2,1,0,2))
 names(counts)<-0:8

 st<-table(rpois(2000,2.3))
 as.vector(st)
 # this removes the names

 data<-read.csv("c=\\MSc\\Stats\\daphnia.csv")
 attach(data)
 names(data)
 tapply(Growth.rate,Detergent,mean)
 tapply(Growth.rate,list(Water,Daphnia),median)

 x<-0:10
 sum(x)
 sum(x<5)
 x<5
 1*(x<5)
 x*(x<5)
 sum(x*(x<5))

 y<-c(8,3,5,7,6,6,8,9,2,3,9,4,10,4,11)
 sort(y)
 # sorts in ascending sequence

 rev(sort(y))
 # descending

 rev(sort(y))[2]
 rev(sort(y))[1:3]
 sum(rev(sort(y))[1:3])

 y
 which(y>5)
 # shows the addresses of the elements with values higher than 5
 y[y>5]

 length(y)
 length(y[y>5])
 # the number of elements in total

 x<-c(5,8,6,7,1,5,3)
 z<-x[-1]
 z

 trim.mean<-function(x)mean(sort(x)[-c(1,length(x))])
 # it first sorts the vector
 # removes the first element using x[-1] and last using x[-length(x)]
 # -c(1,length(x)) concatenates both instructions

 trim.mean(x)

 # REPEATS

 rep(9,5)
 # 99999
 rep(1:4,2)
 # 12341234
 rep(1:4,each=2)
 #11223344

 rep(1:4,each=2,times=3)
 #112233441122334411223344

 rep(c(9,15,21,83),c(4,1,4,2))
 # 999915212121218383

 # GENERATE FACTOR LEVELS

 gl(4,3)
 # 'gl' (generate levels
 # 'gl' (up to, with repeats of, to total length)

 gl(4,3,24)
 gl(4,3,20)
 gl(3,2,24,labels=c("A","B","C"))
 # AABBCCAABBCCAABBCCAABBCC
 #     Levels: ABC

 seq(0,1.5,0.2)
 # we use 'seq' when the interval is not 1.0
 # here we want to go from 0 to 1.5 in steps of 0.2

 x<-rnorm(18,10,2)
 seq(88,50,along=x)
 # 'along' can create a sequence of the same length as an existing vector 'x'

 # MATRICES

 X<-matrix(c(1,0,0,0,1,0,0,0,1),nrow=3)
 #     [,1] [,2] [,3]
 # [1,]    1    0    0
 # [2,]    0    1    0
 # [3,]    0    0    1

 class(X)
 attributes(X)

 vector<-c(1,2,3,4,4,3,2,1)
 V<-matrix(vector,byrow=T,nrow=2)
 # this allows to convert a vector into a matrix

 V
 #     [,1] [,2] [,3] [,4]
 # [1,]    1    2    3    4
 # [2,]    4    3    2    1

 dim(vector)<-c(4,2)
 is.matrix(vector)
 # this is another way to convert a vector into a matrix

 vector
 #     [,1] [,2]
 # [1,]    1    4
 # [2,]    2    3
 # [3,]    3    2
 # [4,]    4    1

 (vector<-t(vector))
 # transpose matrix

 # CHARACTER STRINGS

 phrase<- "the quick brown fox jumps over the lazy cow"
 q<-character(20)
 for(i in 1:20)q[i]<-substr(phrase,1,i)
 q
 # [1] "t"                    "th"                  
 # [3] "the"                  "the "                
 # [5] "the q"                "the qu"              
 # [7] "the qui"              "the quic"            
 # [9] "the quick"            "the quick "          
 # [11] "the quick b"          "the quick br"        
 # [13] "the quick bro"        "the quick brow"      
 # [15] "the quick brown"      "the quick brown "    
 # [17] "the quick brown f"    "the quick brown fo"  
 # [19] "the quick brown fox"  "the quick brown fox "

 strsplit(phrase,split=character(0))
 # [[1]]
 # [1] "t" "h" "e" " " "q" "u" "i" "c" "k" " " "b" "r" "o"
 # [14] "w" "n" " " "f" "o" "x" " " "j" "u" "m" "p" "s" " "
 # [27] "o" "v" "e" "r" " " "t" "h" "e" " " "l" "a" "z" "y"
 # [40] " " "c" "o" "w"

 table(strsplit(phrase,split=character(0)))
 # a b c e f h i j k l m n o p q r s t u v w x y z 
 # 8 1 1 2 3 1 2 1 1 1 1 1 1 4 1 1 2 1 2 2 1 2 1 1 1 

 words<-1+table(strsplit(phrase,split=character(0)))[1]
 toupper(phrase)
 tolower(toupper(phrase))
 # [1] "THE QUICK BROWN FOX JUMPS OVER THE LAZY COW"
 # toupper makes the characters upper case
 # tolower makes them lower case

 # ARITHMETIC MEAN OF A SINGLE SAMPLE

 arithmetic.mean<-function(x) sum(x)/length(x)

 y<-c(3,3,4,5,5)
 arithmetic.mean(y)
 mean(y)
 sort(y)[ceiling(length(y)/2)]

 med<-function(x){
 odd.even<-length(x)%%2
 if(odd.evem==0) (sort(x)[length(x)/2]+sort(x)[1+length(x)/2])/2
 else sort(x)[ceiling(length(x)/2)]
 }

 med(y)
 med(y[-1])

 # LOOPS AND REPEATS

 for(i in 1:5)print(i^2)

 binary<-function(x)
 if(x==0)return(0)
 i<-0
      string<-numeric(32)
      while(x>0)  {
            string[32-i]<-x%%2
            x<-x%/%2
            i<-i+1      }
 first<-match(1,string)
 string[first:32]

 sapply(15:17,binary)

 # THE IFELSE FUNCTION

 z<-ifelse(y<0, -1,1)

 # EVALUATING FUNCTIONS WITH APPLY

 (X<-matrix(1:24,nrow=4))
 apply(X,1,sum)
 # 'apply' is used for applying functions to the rows or columns of matrices
 # the answer produced by 'apply is a vector rather than a matrix

 apply(X,1,sqrt)

 # TESTING FOR EQUALITY

 x<-sqrt(2)
 x*x==2
 x*x-2

 # TESTING AND COERCING IN R

 as.numeric(factor(c("a","b","c")))
 as.numeric(c("a","b","c"))
 # non-numeric characters cannot be coerced into numbers

 geometric<-function(x){
 if(!is.numeric(x)) stop("Input must be numeric")
 exp(mean(log(x)))}

 geometric(c("a","b","c"))

 geometric<-function(x){
 if(!is.numeric(x)) stop("Input must be numeric")
 if(min(x)<=0) stop ("Input must be greater than zero")
 exp(mean(log(x)))}

 geometric(c(2,3,0,4))
 geometric(c(10,1000,10,1,1))

 # TIME AND DATES

 Sys.time()
 class(Sys.time())
 time.list<-as.POSIXlt(Sys.time())
 class(time.list)
 unlist(time.list)
 tim.list$wday
 time.list$yday

 Rdate<-strptime(date,"%d/%m/%Y")
 class(Rdate)

 tapply(x,Rdate$wday,mean)

 y<-paste(hrs,min,sec,sep=":")

 strptime(y,"%T")

 (Rtime<-as.difftime(y))

 tapply(Rtime,experiment,mean)

 y2<-as.POSIXlt("2018-10-22")
 y1<-as.POSIXlt("2018-10-22")

 y2-y1

 y3<-as.POSIXlt("2018-10-22 09:30:59")
 y4<-as.POSIXlt("2018-10-22 12:45:06")
 y4-y3

 difftime("2018-10-22 12:45:06","2018-10-22 09:30:59")

 y4>y3



 # UNDERSTANDIN THE STRUCTURE OF AN R OBJECT USING 'str'

 x<-runif(23)
 str(x)

 basket<-list(rep("a",4),c("b0","b1","b2"),9:4,gl(5,3))
 basket
 str(basket)

 xv<-seq(0,30)
 yv<-2+0.5*xv+rnorm(31,0,2)
 model<-lm(yv~xv+I(xv^2))
 str(model)
	# CHAPTER 1

	# FUNDAMENTALS

	# The response variable is the things you are working on, the variable whose variation you are attempting to understand.
	# This is the variable that goes in the "y" axis of the graph (the ordinate).
	# The explanatory variable goes on the "x" axis of the graph (the abscissa).

	# The explanatory variables:
	# - Regression: all explanatory variables continuous.
	# - ANOVA: all explanatory variables categorical.
	# - ANCOVA: some explanatory variables continuous some categorical.

	# The response variable:
	# - Regression, ANOVA or ANCOVA: continuous.
	# - Logistic regression: proportion.
	# - Log linear models: count.
	# - Binary logistic analysis: binary.
	# - Survival analysis: time at death.

	# EVERYTHING VARIES:

	# Heterogeneity is universal, so we need a way of discriminating between variation that is scientifically interesting
	# and variation that just reflects background heterogeneity.

	worms <- read.csv("C:\\MSc\\Statistics\\worms.csv")
	# opens the file worms.csv

	attach(worms)
	# takes a "database"

	names(worms)
	# prints the name of the columns

	# subscripts in R appear within scare brackets, thus y[7] is the 7th element of the vector "y"
	# z[2,6] it is the second row of the 6th column.
	# [, means all the rows
	# ,] means all the columns

	worms[,1:3]
	# to view the first, second and third column.

	worms[5:15,]
	# to view the middle 11 rows of all the columns of the dataframe

	worms[Area>3&Slope<3,]
	# this selects onyl the rows which have Area > 3 and Slope <3 using ']

	worms[order(Area),]
	# this sorts the variable "Area" in ascending order

	worms[rev(order(worms[,5])),c(5,7)]
	# this sorts the 5th and 7th column into descending order

	# SUMMARISING THE CONTENTS OF A DATAFRAME

	summary(worms)

	with(worms,tapply(Worm.density,Vegetation,mean))
	# to find out the mean number under different variables
	# for a single response variable you can use "tapply" and "with"

	aggregate(worms[,c(2,3,5,7)], list(Vegetation),mean)
	# "aggregate" calculates the mean values of all the continuous variables summarised at the same time
	# "aggregate" assigns the word "Group.1" to the "Vegetation" column as a default
	aggregate(worms[,c(2,3,5,7)], list(Communit=Vegetation),mean)
	# this assigns the word "Community to the column instead

	aggregate(worms[,c(2,3,5,7)], list(Moisture=Damp,Community=Vegetation),mean)
	# you can do multiple classifications using two or more categorical explanatory variables

	with(worms,tapply(Slope,list(Damp,Vegetation),mean))
	# tapply operator produces NA for missing combinations

	# GET TO KNOW YOUR DATA

	head(data)
	# prints the first 6 lines of the column

	plot(y)
	# produces a scatterplot with the values of "y"

	which(y>10)
	# prints the number of the line that has a value of >10

	y[50]<-2.179386
	# assigns the value 2.179386 to y[50]

	table(treatment)
	# this checks the factor levels of the variable "treatment"

	# in this case we found a mistake in one of the entries
	which(treatment=="nitogen")
	# this finds the mistake: in this case "nitogen"
	treatment[11]<-"nitrogen"
	# this fixes the mistake, attributing the word "nitrogen" to the entry.
	# Note that words need to be in brackets ""

	# to save the file
	write(x, file = "data",
	ncolumns = if(is.character(x)) 1 else 5,
	append = FALSE, sep = " ")
	# it should work just by typing:
	write.csv(name_of_the_file, file = "new_name.csv")

	# when saving files it is worth setting a directory to save the files

	getwd()
	# this prints the current working directory

	setwd("C:\\MSc\\Statistics\\Satistics")
	# this sets the current directory
	# note that "C:\\" must be in capitols

	detach(data)
	# this stops R from using the file "data" again

	rm(data)
	# this operator removes "data" from R

	# RELATIONSHIPS

	plot(x,y,pch=21,bg="red")
	# the response variable is "y" and the explanatory variable is "x"
	# this plots a graph with red spots as the values in the entries
	# pch stands for plotting character, pch=21 gives a black edge and bright colour fill
	# bg is background = red

	plot(factor(month),upper)
	# this produces a box-and-whisker plot which is useful for error-checking
	# R produces this when the operator 'plot' is followed by a categorical explanatory variable ('factor(month)')

	# LOOKING FOR INTERACTIONS BETWEEN CONTINUOUS VARIABLES

	# we now have a table with three columns: x, y and z

	par(mfrow=c(1,2))
	# this alters the graphics parameter to specify two sets of axis on the same row

	plot(x,y)
	plot(z,y)
	# now these two graphs are side by side

	windows(7,7)
	# windows operator changes the shape of the plotting window
	# this command is not needed for RStudio

	coplot(y~x\|z,pch=16,panel=panel.smooth)
	# coplot plots y against x conditional of (or given) the value of z
	# "\|" is read "given"
	# this splits the data into six graphs
	# pch=16 is for solid black plotting symbols
	# panel=panel.smooth is to fit a trend line through the scatterplot in red
	# it also produces shaded horizontal bars (shingles) which overlap as they share data points

	# INTERACTIONS INVOLVING CATEGORICAL VARIABLES

	par(mfrow=c(1,2))
	# alters the graphics parameter to specify two sets of axis on the same row

	plot(nitrogen,yield,main="Nitrog")
	plot(phosphorus,yield,main="Phosph")
	# plots show the 'main effects' of nitrogen and phosphorous in separate graphs

	tapply(yield,list(nitrogen,phosphorus),mean)
	# 'tapply' summarises variables
	# useful for breaking up a vector into groups defined by some classifying factor, compute a factor on the subsets and returning the result in a convenient form

	barplot(tapply(yield,list(nitrogen,phosphorus),mean),beside = TRUE,xlab = "phosphorous")
	# barplot is used to output the interactions calculated by tapply graphically
	# xlab is similar to 'main', it adds labels to the plot

	legend(locator(1),legend = c("no","yes"),title="nitrogen",fill=c("black","lightgrey"))
	# 'locator' allows to put the legend in a place where it does no interfere with any of the bars
	# click to add the legend

	# APPENDIX, ESSENTIALS OF THE R LANGUAGE

	# R AS A CALCULATOR

	log(42/7.3)

	log(16,2)

	5+6+3+6+4+2+4+8+3+2+7

	2+3; 5*7; 3-7

	8^(1/3)

	abs(5.7-6.8)/0.38

	options(digits=3)
	# controls the number of digits printed

	# BUILT-IN FUNCTIONS

	exp(1)
	# antilog function of the base e

	pi

	sin(pi/2)

	cos(pi/2)

	# MODULO AND INTERGER QUOTIENTS

	119%/%13
	# integer

	119%%13
	# modulo: whats left over
	# modulo can be used to test whether numbers are odd or even

	15421%%7==0
	# modulo can also be used to find out whether one number is an exact multiple of another

	# ASSIGNMENT

	x<-5

	# ROUNDING

	floor(5.7)
	ceiling(5.7)

	rounded<-function(x)floor(x+0.5)
	rounded(5.7)

	# INFINITY AND THINGS THAT ARE NOT A NUMBER

	3/0
	# Inf

	-12/0
	# -Inf

	exp(-Inf)
	# 0

	0/Inf
	# 0

	(0:3)^Inf
	# 0:3 generates a series (0,1,2,3)
	# It produces a vector of answers 0 1 Inf Inf

	0/0
	# NaN (Not a Number)

	is.finite(10)
	# TRUE

	is.infinite(10)
	# FALSE

	# MISSING VALUES (NA)

	x<-c(1:8,NA)
	mean(x)
	# this will come up NA
	# the NA needs to be removed

	mean(x,na.rm=T)
	# this removes NA

	(vmv<-C(1:6,NA,NA,9:12))
	# attribute values to 'vmv'

	seq(along=vmv)[is.na(vmv)]
	# 'is.na(x)' locates the missing values whithin a vector

	which(is.na(vmv)
	# same result as before

	(vmv[is.na(vmv)]<-0)
	# this attributes '0' to the NA values

	vmv<-c(1:6,NA,NA,9:12)
	ifelse(is.na(vmv),0,vmv)
	# 'ifelse' does the same

	# CREATING A VECTOR

	y<-10:16
	y<-(10,11,12,13,14,15,16)
	y<-scan()
	# 'scan' manual input through keyboard

	(counts<-c(25,12,7,4,6,2,1,0,2))
	names(counts)<-0:8

	st<-table(rpois(2000,2.3))
	as.vector(st)
	# this removes the names

	data<-read.csv("c=\\MSc\\Stats\\daphnia.csv")
	attach(data)
	names(data)
	tapply(Growth.rate,Detergent,mean)
	tapply(Growth.rate,list(Water,Daphnia),median)

	x<-0:10
	sum(x)
	sum(x<5)
	x<5
	1*(x<5)
	x*(x<5)
	sum(x*(x<5))

	y<-c(8,3,5,7,6,6,8,9,2,3,9,4,10,4,11)
	sort(y)
	# sorts in ascending sequence

	rev(sort(y))
	# descending

	rev(sort(y))[2]
	rev(sort(y))[1:3]
	sum(rev(sort(y))[1:3])

	y
	which(y>5)
	# shows the addresses of the elements with values higher than 5
	y[y>5]

	length(y)
	length(y[y>5])
	# the number of elements in total

	x<-c(5,8,6,7,1,5,3)
	z<-x[-1]
	z

	trim.mean<-function(x)mean(sort(x)[-c(1,length(x))])
	# it first sorts the vector
	# removes the first element using x[-1] and last using x[-length(x)]
	# -c(1,length(x)) concatenates both instructions

	trim.mean(x)

	# REPEATS

	rep(9,5)
	# 99999
	rep(1:4,2)
	# 12341234
	rep(1:4,each=2)
	#11223344

	rep(1:4,each=2,times=3)
	#112233441122334411223344

	rep(c(9,15,21,83),c(4,1,4,2))
	# 999915212121218383

	# GENERATE FACTOR LEVELS

	gl(4,3)
	# 'gl' (generate levels
	# 'gl' (up to, with repeats of, to total length)

	gl(4,3,24)
	gl(4,3,20)
	gl(3,2,24,labels=c("A","B","C"))
	# AABBCCAABBCCAABBCCAABBCC
	# Levels: ABC

	seq(0,1.5,0.2)
	# we use 'seq' when the interval is not 1.0
	# here we want to go from 0 to 1.5 in steps of 0.2

	x<-rnorm(18,10,2)
	seq(88,50,along=x)
	# 'along' can create a sequence of the same length as an existing vector 'x'

	# MATRICES

	X<-matrix(c(1,0,0,0,1,0,0,0,1),nrow=3)
	# [,1] [,2] [,3]
	# [1,] 1 0 0
	# [2,] 0 1 0
	# [3,] 0 0 1

	class(X)
	attributes(X)

	vector<-c(1,2,3,4,4,3,2,1)
	V<-matrix(vector,byrow=T,nrow=2)
	# this allows to convert a vector into a matrix

	V
	# [,1] [,2] [,3] [,4]
	# [1,] 1 2 3 4
	# [2,] 4 3 2 1

	dim(vector)<-c(4,2)
	is.matrix(vector)
	# this is another way to convert a vector into a matrix

	vector
	# [,1] [,2]
	# [1,] 1 4
	# [2,] 2 3
	# [3,] 3 2
	# [4,] 4 1

	(vector<-t(vector))
	# transpose matrix

	# CHARACTER STRINGS

	phrase<- "the quick brown fox jumps over the lazy cow"
	q<-character(20)
	for(i in 1:20)q[i]<-substr(phrase,1,i)
	q
	# [1] "t" "th"
	# [3] "the" "the "
	# [5] "the q" "the qu"
	# [7] "the qui" "the quic"
	# [9] "the quick" "the quick "
	# [11] "the quick b" "the quick br"
	# [13] "the quick bro" "the quick brow"
	# [15] "the quick brown" "the quick brown "
	# [17] "the quick brown f" "the quick brown fo"
	# [19] "the quick brown fox" "the quick brown fox "

	strsplit(phrase,split=character(0))
	# [[1]]
	# [1] "t" "h" "e" " " "q" "u" "i" "c" "k" " " "b" "r" "o"
	# [14] "w" "n" " " "f" "o" "x" " " "j" "u" "m" "p" "s" " "
	# [27] "o" "v" "e" "r" " " "t" "h" "e" " " "l" "a" "z" "y"
	# [40] " " "c" "o" "w"

	table(strsplit(phrase,split=character(0)))
	# a b c e f h i j k l m n o p q r s t u v w x y z
	# 8 1 1 2 3 1 2 1 1 1 1 1 1 4 1 1 2 1 2 2 1 2 1 1 1

	words<-1+table(strsplit(phrase,split=character(0)))[1]
	toupper(phrase)
	tolower(toupper(phrase))
	# [1] "THE QUICK BROWN FOX JUMPS OVER THE LAZY COW"
	# toupper makes the characters upper case
	# tolower makes them lower case

	# ARITHMETIC MEAN OF A SINGLE SAMPLE

	arithmetic.mean<-function(x) sum(x)/length(x)

	y<-c(3,3,4,5,5)
	arithmetic.mean(y)
	mean(y)
	sort(y)[ceiling(length(y)/2)]

	med<-function(x){
	odd.even<-length(x)%%2
	if(odd.evem==0) (sort(x)[length(x)/2]+sort(x)[1+length(x)/2])/2
	else sort(x)[ceiling(length(x)/2)]
	}

	med(y)
	med(y[-1])

	# LOOPS AND REPEATS

	for(i in 1:5)print(i^2)

	binary<-function(x)
	if(x==0)return(0)
	i<-0
	string<-numeric(32)
	while(x>0) {
	string[32-i]<-x%%2
	x<-x%/%2
	i<-i+1 }
	first<-match(1,string)
	string[first:32]

	sapply(15:17,binary)

	# THE IFELSE FUNCTION

	z<-ifelse(y<0, -1,1)

	# EVALUATING FUNCTIONS WITH APPLY

	(X<-matrix(1:24,nrow=4))
	apply(X,1,sum)
	# 'apply' is used for applying functions to the rows or columns of matrices
	# the answer produced by 'apply is a vector rather than a matrix

	apply(X,1,sqrt)

	# TESTING FOR EQUALITY

	x<-sqrt(2)
	x*x==2
	x*x-2

	# TESTING AND COERCING IN R

	as.numeric(factor(c("a","b","c")))
	as.numeric(c("a","b","c"))
	# non-numeric characters cannot be coerced into numbers

	geometric<-function(x){
	if(!is.numeric(x)) stop("Input must be numeric")
	exp(mean(log(x)))}

	geometric(c("a","b","c"))

	geometric<-function(x){
	if(!is.numeric(x)) stop("Input must be numeric")
	if(min(x)<=0) stop ("Input must be greater than zero")
	exp(mean(log(x)))}

	geometric(c(2,3,0,4))
	geometric(c(10,1000,10,1,1))

	# TIME AND DATES

	Sys.time()
	class(Sys.time())
	time.list<-as.POSIXlt(Sys.time())
	class(time.list)
	unlist(time.list)
	tim.list$wday
	time.list$yday

	Rdate<-strptime(date,"%d/%m/%Y")
	class(Rdate)

	tapply(x,Rdate$wday,mean)

	y<-paste(hrs,min,sec,sep=":")

	strptime(y,"%T")

	(Rtime<-as.difftime(y))

	tapply(Rtime,experiment,mean)

	y2<-as.POSIXlt("2018-10-22")
	y1<-as.POSIXlt("2018-10-22")

	y2-y1

	y3<-as.POSIXlt("2018-10-22 09:30:59")
	y4<-as.POSIXlt("2018-10-22 12:45:06")
	y4-y3

	difftime("2018-10-22 12:45:06","2018-10-22 09:30:59")

	y4>y3



	# UNDERSTANDIN THE STRUCTURE OF AN R OBJECT USING 'str'

	x<-runif(23)
	str(x)

	basket<-list(rep("a",4),c("b0","b1","b2"),9:4,gl(5,3))
	basket
	str(basket)

	xv<-seq(0,30)
	yv<-2+0.5*xv+rnorm(31,0,2)
	model<-lm(yv~xv+I(xv^2))
	str(model)