Last active
July 6, 2016 22:21
-
-
Save joaovissoci/10888946 to your computer and use it in GitHub Desktop.
basic R statistical functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ###################################################################### | |
| #BASIC R STATISTICS TEMPLATE | |
| ###################################################################### | |
| # | |
| # | |
| # | |
| # | |
| # | |
| ###################################################################### | |
| #SETTING ENVIRONMENT | |
| ###################################################################### | |
| #PASCKAGES INSTALLATION CODES | |
| #install.packages("Hmisc") | |
| #install.packages("car") | |
| #install.packages("psych") | |
| #install.packages("nortest") | |
| #install.packages("ggplot2") | |
| #install.packages("pastecs") | |
| #install.packages("repmis") | |
| #install.packages("mvnormtest") | |
| #install.packages("polycor") | |
| #PACKAGES LOADING CODE | |
| #Load packages neededz for the analysis | |
| #library(Hmisc) | |
| #All packages must be installes with install.packages() function | |
| lapply(c("Hmisc","car","psych","nortest","ggplot2","pastecs","repmis", | |
| "mvnormtest","polycor"), | |
| library, character.only=T) | |
| ###################################################################### | |
| #IMPORTING DATA | |
| ###################################################################### | |
| #LOADING DATA FROM A .CSV FILE | |
| data<-read.csv("",sep=",") | |
| #information between " " are the path to the directory in your computer where the data is stored | |
| ###################################################################### | |
| #DATA MANAGEMENT | |
| ###################################################################### | |
| #Creating a data frame (group of variables) | |
| #numeric<-with(data, data.frame(Peso,Altura,IMC, | |
| # Idade)) | |
| # | |
| ##Change variables properties | |
| ##Change variable to factor | |
| #data$Classificacao<-as.factor(data$Classificacao) | |
| # | |
| ##Change variable to character | |
| #data$Classificacao<-as.character(data$Classificacao) | |
| # | |
| ##Change variable to numeric | |
| #data$Classificacao<-as.numeric(data$Classificacao) | |
| # | |
| ##Recoding variables | |
| #data$Classificacao<-car::recode(data$Classificacao,"#1='baixo';2='medio'; | |
| # 3='alto'") | |
| data <- base::merge(data1,data2,by=c("nome")) | |
| ###################################################################### | |
| #BASIC DESCRIPTIVES and EXPLORATORY ANALYSIS | |
| ###################################################################### | |
| ###Section wih several exploratory data analysis functions | |
| #Exploratory Data Anlysis | |
| #dim(data) | |
| #str (data) | |
| #head(data) | |
| #names(data) | |
| #summary(data)#This comand will provide a whole set of descriptive #results for each variables | |
| describe(data) | |
| with(data,by(data,outcome,describe)) | |
| with(data,by(data,outcome,summary)) | |
| #stat.desc(data) | |
| with(data,by(data,outcome,ad.test)) # Anderson-Darling test for normality | |
| #skewness(data$Idade) #Will provide skweness analysis | |
| #kurtosis(data$Idade) - 3 #Will provide kurtosis analysis | |
| #qplot(data$Idade) # histogram plot | |
| #boxplot(data$Idade~data$Classificacao) #will provide a boxplot for the #variables to analysis potential outliers | |
| ## Bartlett Test of Homogeneity of Variances | |
| #bartlett.test(data$Idade~data$Classificacao, data=data) | |
| ## Figner-Killeen Test of Homogeneity of Variances | |
| #fligner.test(data$Idade~data$Classificacao, data=data) | |
| #leveneTest(data$Idade~data$Classificacao, data=data) | |
| ###################################################################### | |
| #TABLE 1 | |
| ###################################################################### | |
| # 2-Way Frequency Table | |
| mytable <- with(data,table(Sexo,Classificacao)) # A will be rows, B will be columns | |
| mytable # print table | |
| margin.table(mytable, 1) # A frequencies (summed over B) | |
| margin.table(mytable, 2) # B frequencies (summed over A) | |
| prop.table(mytable) # cell percentages | |
| prop.table(mytable, 1) # row percentages | |
| prop.table(mytable, 2) # column percentages | |
| #Teste de associação/Fisher | |
| assocstats(mytable) | |
| fisher.test(mytable) | |
| # 3-Way Frequency Table | |
| mytable <- with(data,table(Sexo,Classificacao,Faixa_etaria)) # A will be rows, B will be columns | |
| ftable(mytable) | |
| #OUTCOME ASSOCIATION AND BIVARIATE ANALYSIS | |
| ########################################### | |
| #PARAMETRIC | |
| # one sample t-test | |
| t.test(data$IMC,mu=25) # Ho: mu=3 | |
| # independent 2-group t-test | |
| t.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor | |
| # paired t-test | |
| IMC2<-data$IMC*2 | |
| t.test(data$IMC,IMC2,paired=TRUE) # where y is numeric and x is a binary factor | |
| #NONPARAMETRIC | |
| wilcox.test(data$IMC~data$Sexo,paired=FALSE) # where y is numeric and x is a binary factor | |
| # paired t-test | |
| wilcox.test(data$IMC,IMC2,paired=TRUE) | |
| ###################################################################### | |
| #MULTIVARIATE ANALYSIS | |
| ###################################################################### | |
| # ANALYSIS OF VARIANCE | |
| ################################## | |
| # One Way Anova (Completely Randomized Design) | |
| fit <- aov(Idade ~ Classificacao, data=data) | |
| summary(fit) | |
| # Randomized Block Design (B is the blocking factor) | |
| fit <- aov(Idade ~ Classificacao+Sexo, data=data) | |
| summary(fit) | |
| # Two Way Factorial Design | |
| fit <- aov(Idade ~ Classificacao*Sexo, data=data) | |
| summary(fit) | |
| # Tukey Honestly Significant Differences | |
| TukeyHSD(fit) # where fit comes from aov() | |
| # Analysis of Covariance | |
| fit <- aov(Idade ~ Classificacao + IMC, data=data) | |
| summary(fit) | |
| # Kruskal Wallis Test One Way Anova by Ranks | |
| kruskal.test(Idade ~ Classificacao, data=data) # where y1 is numeric and A is a factor | |
| #CORRELATIONS | |
| ############################## | |
| #Pearson | |
| cor(numeric, use="complete.obs", method="pearson") | |
| #Spearman | |
| cor(numeric, use="complete.obs", method="spearman") | |
| #Kendall | |
| cor(numeric, use="complete.obs", method="kendall") | |
| #Significance testing | |
| rcorr(as.matrix(numeric), type="pearson") # type can be pearson or spearman | |
| cor.test(numeric$Peso,numeric$Altura) #Used for a single test of significance | |
| # heterogeneous correlations in one matrix | |
| # pearson (numeric-numeric), | |
| # polyserial (numeric-ordinal), | |
| # and polychoric (ordinal-ordinal) | |
| # x is a data frame with ordered factors | |
| # and numeric variables | |
| hetcor(data) | |
| # polychoric correlation | |
| # x is a contingency table of counts | |
| polychor(data) | |
| #GLM | |
| ############################################ | |
| baselineXFUP3<-glm(ATTEMPT_P ~ Anxiety_presence + AGE + SEX + MARSTAT + | |
| ATTEMPT_baseline + Diagnostic | |
| ,family=binomial, data=FUP3) | |
| summary(baselineXFUP3) | |
| #anova(reglogGEU) | |
| #exp(coef(model1_death)) # exponentiated coefficients | |
| #exp(confint(model1_death)) # 95% CI for exponentiated coefficients | |
| #predict(model1_death, type="response") # predicted values | |
| #residuals(model1_death, type="deviance") # residuals | |
| #logistic.display(baselineXFUP3) | |
| ###################################################################### | |
| #COMPLEX ANALYSIS AND OTHER FIGURES | |
| ###################################################################### | |
| ###################################################################### | |
| #END | |
| ###################################################################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment