Skip to content

Instantly share code, notes, and snippets.

@joaovissoci
Last active July 6, 2016 22:21
Show Gist options
  • Save joaovissoci/10888946 to your computer and use it in GitHub Desktop.
Save joaovissoci/10888946 to your computer and use it in GitHub Desktop.
basic R statistical functions
######################################################################
#BASIC R STATISTICS TEMPLATE
######################################################################
#
#
#
#
#
######################################################################
#SETTING ENVIRONMENT
######################################################################
#PASCKAGES INSTALLATION CODES
#install.packages("Hmisc")
#install.packages("car")
#install.packages("psych")
#install.packages("nortest")
#install.packages("ggplot2")
#install.packages("pastecs")
#install.packages("repmis")
#install.packages("mvnormtest")
#install.packages("polycor")
#PACKAGES LOADING CODE
#Load packages neededz for the analysis
#library(Hmisc)
#All packages must be installes with install.packages() function
lapply(c("Hmisc","car","psych","nortest","ggplot2","pastecs","repmis",
"mvnormtest","polycor"),
library, character.only=T)
######################################################################
#IMPORTING DATA
######################################################################
#LOADING DATA FROM A .CSV FILE
data<-read.csv("",sep=",")
#information between " " are the path to the directory in your computer where the data is stored
######################################################################
#DATA MANAGEMENT
######################################################################
#Creating a data frame (group of variables)
#numeric<-with(data, data.frame(Peso,Altura,IMC,
# Idade))
#
##Change variables properties
##Change variable to factor
#data$Classificacao<-as.factor(data$Classificacao)
#
##Change variable to character
#data$Classificacao<-as.character(data$Classificacao)
#
##Change variable to numeric
#data$Classificacao<-as.numeric(data$Classificacao)
#
##Recoding variables
#data$Classificacao<-car::recode(data$Classificacao,"#1='baixo';2='medio';
# 3='alto'")
data <- base::merge(data1,data2,by=c("nome"))
######################################################################
#BASIC DESCRIPTIVES and EXPLORATORY ANALYSIS
######################################################################
###Section wih several exploratory data analysis functions
#Exploratory Data Anlysis
#dim(data)
#str (data)
#head(data)
#names(data)
#summary(data)#This comand will provide a whole set of descriptive #results for each variables
describe(data)
with(data,by(data,outcome,describe))
with(data,by(data,outcome,summary))
#stat.desc(data)
with(data,by(data,outcome,ad.test)) # Anderson-Darling test for normality
#skewness(data$Idade) #Will provide skweness analysis
#kurtosis(data$Idade) - 3 #Will provide kurtosis analysis
#qplot(data$Idade) # histogram plot
#boxplot(data$Idade~data$Classificacao) #will provide a boxplot for the #variables to analysis potential outliers
## Bartlett Test of Homogeneity of Variances
#bartlett.test(data$Idade~data$Classificacao, data=data)
## Figner-Killeen Test of Homogeneity of Variances
#fligner.test(data$Idade~data$Classificacao, data=data)
#leveneTest(data$Idade~data$Classificacao, data=data)
######################################################################
#TABLE 1
######################################################################
# 2-Way Frequency Table
mytable <- with(data,table(Sexo,Classificacao)) # A will be rows, B will be columns
mytable # print table
margin.table(mytable, 1) # A frequencies (summed over B)
margin.table(mytable, 2) # B frequencies (summed over A)
prop.table(mytable) # cell percentages
prop.table(mytable, 1) # row percentages
prop.table(mytable, 2) # column percentages
#Teste de associação/Fisher
assocstats(mytable)
fisher.test(mytable)
# 3-Way Frequency Table
mytable <- with(data,table(Sexo,Classificacao,Faixa_etaria)) # A will be rows, B will be columns
ftable(mytable)
#OUTCOME ASSOCIATION AND BIVARIATE ANALYSIS
###########################################
#PARAMETRIC
# one sample t-test
t.test(data$IMC,mu=25) # Ho: mu=3
# independent 2-group t-test
t.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor
# paired t-test
IMC2<-data$IMC*2
t.test(data$IMC,IMC2,paired=TRUE) # where y is numeric and x is a binary factor
#NONPARAMETRIC
wilcox.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor
# paired t-test
wilcox.test(data$IMC,IMC2,paired=TRUE)
######################################################################
#MULTIVARIATE ANALYSIS
######################################################################
# ANALYSIS OF VARIANCE
##################################
# One Way Anova (Completely Randomized Design)
fit <- aov(Idade ~ Classificacao, data=data)
summary(fit)
# Randomized Block Design (B is the blocking factor)
fit <- aov(Idade ~ Classificacao+Sexo, data=data)
summary(fit)
# Two Way Factorial Design
fit <- aov(Idade ~ Classificacao*Sexo, data=data)
summary(fit)
# Tukey Honestly Significant Differences
TukeyHSD(fit) # where fit comes from aov()
# Analysis of Covariance
fit <- aov(Idade ~ Classificacao + IMC, data=data)
summary(fit)
# Kruskal Wallis Test One Way Anova by Ranks
kruskal.test(Idade ~ Classificacao, data=data) # where y1 is numeric and A is a factor
#CORRELATIONS
##############################
#Pearson
cor(numeric, use="complete.obs", method="pearson")
#Spearman
cor(numeric, use="complete.obs", method="spearman")
#Kendall
cor(numeric, use="complete.obs", method="kendall")
#Significance testing
rcorr(as.matrix(numeric), type="pearson") # type can be pearson or spearman
cor.test(numeric$Peso,numeric$Altura) #Used for a single test of significance
# heterogeneous correlations in one matrix
# pearson (numeric-numeric),
# polyserial (numeric-ordinal),
# and polychoric (ordinal-ordinal)
# x is a data frame with ordered factors
# and numeric variables
hetcor(data)
# polychoric correlation
# x is a contingency table of counts
polychor(data)
#GLM
############################################
baselineXFUP3<-glm(ATTEMPT_P ~ Anxiety_presence + AGE + SEX + MARSTAT +
ATTEMPT_baseline + Diagnostic
,family=binomial, data=FUP3)
summary(baselineXFUP3)
#anova(reglogGEU)
#exp(coef(model1_death)) # exponentiated coefficients
#exp(confint(model1_death)) # 95% CI for exponentiated coefficients
#predict(model1_death, type="response") # predicted values
#residuals(model1_death, type="deviance") # residuals
#logistic.display(baselineXFUP3)
######################################################################
#COMPLEX ANALYSIS AND OTHER FIGURES
######################################################################
######################################################################
#END
######################################################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment