joaovissoci · July 6, 2016 22:21
diff --git a/r_basics_template.R b/r_basics_template.R
 ######################################################################
 #BASIC R STATISTICS TEMPLATE
 ######################################################################
 #
 #
 #
 #
 #
 ######################################################################
 #SETTING ENVIRONMENT
 ######################################################################
 #PASCKAGES INSTALLATION CODES
 #install.packages("Hmisc")
 #install.packages("car")
 #install.packages("psych")
 #install.packages("nortest")
 #install.packages("ggplot2")
 #install.packages("pastecs")
 #install.packages("repmis")
 #install.packages("mvnormtest")
 #install.packages("polycor")

 #PACKAGES LOADING CODE
 #Load packages neededz for the analysis
 #library(Hmisc)

 #All packages must be installes with install.packages() function
 lapply(c("Hmisc","car","psych","nortest","ggplot2","pastecs","repmis",
 	"mvnormtest","polycor"), 
 library, character.only=T)

 ######################################################################
 #IMPORTING DATA
 ######################################################################
 #LOADING DATA FROM A .CSV FILE
 data<-read.csv("",sep=",")
 #information between " " are the path to the directory in your computer where the data is stored

 ######################################################################
 #DATA MANAGEMENT
 ######################################################################
 #Creating a data frame (group of variables)
 #numeric<-with(data, data.frame(Peso,Altura,IMC,
 #                          Idade))
 #
 ##Change variables properties
 ##Change variable to factor
 #data$Classificacao<-as.factor(data$Classificacao)
 #
 ##Change variable to character
 #data$Classificacao<-as.character(data$Classificacao)
 #
 ##Change variable to numeric
 #data$Classificacao<-as.numeric(data$Classificacao)
 #
 ##Recoding variables
 #data$Classificacao<-car::recode(data$Classificacao,"#1='baixo';2='medio';
 #	3='alto'")

 data <- base::merge(data1,data2,by=c("nome"))


 ######################################################################
 #BASIC DESCRIPTIVES and EXPLORATORY ANALYSIS
 ######################################################################
 ###Section wih several exploratory data analysis functions
 #Exploratory Data Anlysis
 #dim(data)
 #str (data)
 #head(data)
 #names(data)
 #summary(data)#This comand will provide a whole set of descriptive #results for each variables
 describe(data)
 with(data,by(data,outcome,describe))
 with(data,by(data,outcome,summary))
 #stat.desc(data)
 with(data,by(data,outcome,ad.test)) # Anderson-Darling test for normality
 #skewness(data$Idade) #Will provide skweness analysis
 #kurtosis(data$Idade) - 3 #Will provide kurtosis analysis
 #qplot(data$Idade) # histogram plot
 #boxplot(data$Idade~data$Classificacao) #will provide a boxplot for the #variables to analysis potential outliers
 ## Bartlett Test of Homogeneity of Variances
 #bartlett.test(data$Idade~data$Classificacao, data=data)
 ## Figner-Killeen Test of Homogeneity of Variances
 #fligner.test(data$Idade~data$Classificacao, data=data)
 #leveneTest(data$Idade~data$Classificacao, data=data)

 ######################################################################
 #TABLE 1
 ######################################################################
 # 2-Way Frequency Table 
 mytable <- with(data,table(Sexo,Classificacao)) # A will be rows, B will be columns 
 mytable # print table 

 margin.table(mytable, 1) # A frequencies (summed over B) 
 margin.table(mytable, 2) # B frequencies (summed over A)

 prop.table(mytable) # cell percentages
 prop.table(mytable, 1) # row percentages 
 prop.table(mytable, 2) # column percentages

 #Teste de associação/Fisher
 assocstats(mytable)
 fisher.test(mytable)

 # 3-Way Frequency Table 
 mytable <- with(data,table(Sexo,Classificacao,Faixa_etaria)) # A will be rows, B will be columns 
 ftable(mytable)

 #OUTCOME ASSOCIATION AND BIVARIATE ANALYSIS
 ###########################################
 #PARAMETRIC
 # one sample t-test
 t.test(data$IMC,mu=25) # Ho: mu=3

 # independent 2-group t-test
 t.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor

 # paired t-test
 IMC2<-data$IMC*2
 t.test(data$IMC,IMC2,paired=TRUE) # where y is numeric and x is a binary factor

 #NONPARAMETRIC
 wilcox.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor

 # paired t-test
 wilcox.test(data$IMC,IMC2,paired=TRUE) 

 ######################################################################
 #MULTIVARIATE ANALYSIS
 ######################################################################

 # ANALYSIS OF VARIANCE
 ##################################
 # One Way Anova (Completely Randomized Design)
 fit <- aov(Idade ~ Classificacao, data=data)
 summary(fit)

 # Randomized Block Design (B is the blocking factor) 
 fit <- aov(Idade ~ Classificacao+Sexo, data=data)
 summary(fit)

 # Two Way Factorial Design 
 fit <- aov(Idade ~ Classificacao*Sexo, data=data)
 summary(fit)

 # Tukey Honestly Significant Differences
 TukeyHSD(fit) # where fit comes from aov()

 # Analysis of Covariance 
 fit <- aov(Idade ~ Classificacao + IMC, data=data)
 summary(fit)

 # Kruskal Wallis Test One Way Anova by Ranks 
 kruskal.test(Idade ~ Classificacao, data=data) # where y1 is numeric and A is a factor


 #CORRELATIONS
 ##############################
 #Pearson
 cor(numeric, use="complete.obs", method="pearson") 
 #Spearman
 cor(numeric, use="complete.obs", method="spearman") 
 #Kendall
 cor(numeric, use="complete.obs", method="kendall")

 #Significance testing
 rcorr(as.matrix(numeric), type="pearson") # type can be pearson or spearman

 cor.test(numeric$Peso,numeric$Altura) #Used for a single test of significance

 # heterogeneous correlations in one matrix 
 # pearson (numeric-numeric), 
 # polyserial (numeric-ordinal), 
 # and polychoric (ordinal-ordinal)
 # x is a data frame with ordered factors 
 # and numeric variables
 hetcor(data) 

 # polychoric correlation
 # x is a contingency table of counts
 polychor(data) 

 #GLM
 ############################################
 baselineXFUP3<-glm(ATTEMPT_P ~ Anxiety_presence + AGE + SEX + MARSTAT +
                            ATTEMPT_baseline + Diagnostic
                            ,family=binomial, data=FUP3)
 summary(baselineXFUP3)
 #anova(reglogGEU)
 #exp(coef(model1_death)) # exponentiated coefficients
 #exp(confint(model1_death)) # 95% CI for exponentiated coefficients
 #predict(model1_death, type="response") # predicted values
 #residuals(model1_death, type="deviance") # residuals
 #logistic.display(baselineXFUP3)

 ######################################################################
 #COMPLEX ANALYSIS AND OTHER FIGURES
 ######################################################################


 ######################################################################
 #END
 ######################################################################
	######################################################################
	#BASIC R STATISTICS TEMPLATE
	######################################################################
	#
	#
	#
	#
	#
	######################################################################
	#SETTING ENVIRONMENT
	######################################################################
	#PASCKAGES INSTALLATION CODES
	#install.packages("Hmisc")
	#install.packages("car")
	#install.packages("psych")
	#install.packages("nortest")
	#install.packages("ggplot2")
	#install.packages("pastecs")
	#install.packages("repmis")
	#install.packages("mvnormtest")
	#install.packages("polycor")

	#PACKAGES LOADING CODE
	#Load packages neededz for the analysis
	#library(Hmisc)

	#All packages must be installes with install.packages() function
	lapply(c("Hmisc","car","psych","nortest","ggplot2","pastecs","repmis",
	"mvnormtest","polycor"),
	library, character.only=T)

	######################################################################
	#IMPORTING DATA
	######################################################################
	#LOADING DATA FROM A .CSV FILE
	data<-read.csv("",sep=",")
	#information between " " are the path to the directory in your computer where the data is stored

	######################################################################
	#DATA MANAGEMENT
	######################################################################
	#Creating a data frame (group of variables)
	#numeric<-with(data, data.frame(Peso,Altura,IMC,
	# Idade))
	#
	##Change variables properties
	##Change variable to factor
	#data$Classificacao<-as.factor(data$Classificacao)
	#
	##Change variable to character
	#data$Classificacao<-as.character(data$Classificacao)
	#
	##Change variable to numeric
	#data$Classificacao<-as.numeric(data$Classificacao)
	#
	##Recoding variables
	#data$Classificacao<-car::recode(data$Classificacao,"#1='baixo';2='medio';
	# 3='alto'")

	data <- base::merge(data1,data2,by=c("nome"))


	######################################################################
	#BASIC DESCRIPTIVES and EXPLORATORY ANALYSIS
	######################################################################
	###Section wih several exploratory data analysis functions
	#Exploratory Data Anlysis
	#dim(data)
	#str (data)
	#head(data)
	#names(data)
	#summary(data)#This comand will provide a whole set of descriptive #results for each variables
	describe(data)
	with(data,by(data,outcome,describe))
	with(data,by(data,outcome,summary))
	#stat.desc(data)
	with(data,by(data,outcome,ad.test)) # Anderson-Darling test for normality
	#skewness(data$Idade) #Will provide skweness analysis
	#kurtosis(data$Idade) - 3 #Will provide kurtosis analysis
	#qplot(data$Idade) # histogram plot
	#boxplot(data$Idade~data$Classificacao) #will provide a boxplot for the #variables to analysis potential outliers
	## Bartlett Test of Homogeneity of Variances
	#bartlett.test(data$Idade~data$Classificacao, data=data)
	## Figner-Killeen Test of Homogeneity of Variances
	#fligner.test(data$Idade~data$Classificacao, data=data)
	#leveneTest(data$Idade~data$Classificacao, data=data)

	######################################################################
	#TABLE 1
	######################################################################
	# 2-Way Frequency Table
	mytable <- with(data,table(Sexo,Classificacao)) # A will be rows, B will be columns
	mytable # print table

	margin.table(mytable, 1) # A frequencies (summed over B)
	margin.table(mytable, 2) # B frequencies (summed over A)

	prop.table(mytable) # cell percentages
	prop.table(mytable, 1) # row percentages
	prop.table(mytable, 2) # column percentages

	#Teste de associação/Fisher
	assocstats(mytable)
	fisher.test(mytable)

	# 3-Way Frequency Table
	mytable <- with(data,table(Sexo,Classificacao,Faixa_etaria)) # A will be rows, B will be columns
	ftable(mytable)

	#OUTCOME ASSOCIATION AND BIVARIATE ANALYSIS
	###########################################
	#PARAMETRIC
	# one sample t-test
	t.test(data$IMC,mu=25) # Ho: mu=3

	# independent 2-group t-test
	t.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor

	# paired t-test
	IMC2<-data$IMC*2
	t.test(data$IMC,IMC2,paired=TRUE) # where y is numeric and x is a binary factor

	#NONPARAMETRIC
	wilcox.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor

	# paired t-test
	wilcox.test(data$IMC,IMC2,paired=TRUE)

	######################################################################
	#MULTIVARIATE ANALYSIS
	######################################################################

	# ANALYSIS OF VARIANCE
	##################################
	# One Way Anova (Completely Randomized Design)
	fit <- aov(Idade ~ Classificacao, data=data)
	summary(fit)

	# Randomized Block Design (B is the blocking factor)
	fit <- aov(Idade ~ Classificacao+Sexo, data=data)
	summary(fit)

	# Two Way Factorial Design
	fit <- aov(Idade ~ Classificacao*Sexo, data=data)
	summary(fit)

	# Tukey Honestly Significant Differences
	TukeyHSD(fit) # where fit comes from aov()

	# Analysis of Covariance
	fit <- aov(Idade ~ Classificacao + IMC, data=data)
	summary(fit)

	# Kruskal Wallis Test One Way Anova by Ranks
	kruskal.test(Idade ~ Classificacao, data=data) # where y1 is numeric and A is a factor


	#CORRELATIONS
	##############################
	#Pearson
	cor(numeric, use="complete.obs", method="pearson")
	#Spearman
	cor(numeric, use="complete.obs", method="spearman")
	#Kendall
	cor(numeric, use="complete.obs", method="kendall")

	#Significance testing
	rcorr(as.matrix(numeric), type="pearson") # type can be pearson or spearman

	cor.test(numeric$Peso,numeric$Altura) #Used for a single test of significance

	# heterogeneous correlations in one matrix
	# pearson (numeric-numeric),
	# polyserial (numeric-ordinal),
	# and polychoric (ordinal-ordinal)
	# x is a data frame with ordered factors
	# and numeric variables
	hetcor(data)

	# polychoric correlation
	# x is a contingency table of counts
	polychor(data)

	#GLM
	############################################
	baselineXFUP3<-glm(ATTEMPT_P ~ Anxiety_presence + AGE + SEX + MARSTAT +
	ATTEMPT_baseline + Diagnostic
	,family=binomial, data=FUP3)
	summary(baselineXFUP3)
	#anova(reglogGEU)
	#exp(coef(model1_death)) # exponentiated coefficients
	#exp(confint(model1_death)) # 95% CI for exponentiated coefficients
	#predict(model1_death, type="response") # predicted values
	#residuals(model1_death, type="deviance") # residuals
	#logistic.display(baselineXFUP3)

	######################################################################
	#COMPLEX ANALYSIS AND OTHER FIGURES
	######################################################################


	######################################################################
	#END
	######################################################################