Last active
November 29, 2016 17:07
-
-
Save jacksonpradolima/2ae64c2b9a449a289c89edb172d0e017 to your computer and use it in GitHub Desktop.
Evaluate the data distribution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @title Evaluate the data distribution | |
#' | |
#' \code{evaluate.data.distribution} returns the data evaluation of a matrix, | |
#' This function contains the Shapiro-Wilk W test, and graphs and plots from DescTools and scmamp packages | |
#' | |
#' @description This function is a wrapper to evaluate when there is multiple comparison tests. | |
#' @param data | |
#' @param isEnglish | |
#' @return A list with the evaluation results | |
#' | |
#' @examples | |
#' | |
#' Considering this data in a CSV file: | |
#' Instance;NSGAII_candidate_1;NSGAII_candidate_2;NSGAII_candidate_3;NSGAII_candidate_4;NSGAII_candidate_5 | |
#' Bisect;0.2974943182;0.4428475379;0.3952869318;0.4165956439;0.373250947 | |
#' Bub;0.4726869838;0.4723876946;0.4904889349;0.4858916803;0.4519410631 | |
#' Find;0.5496787836;0.5634362087;0.5319182593;0.5007053848;0.5024744964 | |
#' Fourballs;0.0;0.0;0.0;0.0;0.0 | |
#' Mid;0.0;0.0;0.0;0.0;0.0 | |
#' Triangulo;0.4719169351;0.4573268921;0.4447312802;0.4377935454;0.4120169082 | |
#' | |
#' This data constains the means. | |
#' After, You must to use the prepare.data.statistical.test function | |
#' (another function available in https://gist.github.com/jacksonpradolima/2eeac87390268eebb4e57862f076be8a) | |
#' and set the variable my_data with the function result: | |
#' my_data <- prepare.data.statistical.test("my_data.csv") | |
#' | |
#' Now, We can to use the function evaluate.data.distribution(my_data) to see the distribution in English or | |
#' evaluate.data.distribution(my_data, isEnglish = FALSE) to see the distribution in Portuguese. | |
evaluate.data.distribution <- function(data, isEnglish=TRUE){ | |
# Loading needed packages | |
if(!require(ggplot2)) | |
{ | |
print("You are missing the package 'ggplot2', we will now try to install it...") | |
install.packages("ggplot2") | |
library(ggplot2) | |
} | |
if(!require(DescTools)) | |
{ | |
print("You are missing the package 'DescTools', we will now try to install it...") | |
install.packages("DescTools") | |
library(DescTools) | |
} | |
if(!require(scmamp)) | |
{ | |
print("You are missing the package 'scmamp', we will now try to install it...") | |
install.packages("scmamp") | |
library(scmamp) | |
} | |
if(is.data.frame(data)){ | |
df <- as.vector(data.matrix(data)) | |
#Check the normality, if p-value > 0.05 is normal | |
shapiro.result <- shapiro.test(df) | |
if(shapiro.result$p.value > 0.05){ | |
print("The distribution is normal.") | |
}else{ | |
print("The distribution is not-normal.") | |
} | |
# Plot the densities | |
densities <- Desc(df, main="", plotit = TRUE) | |
# Densities by algorithm | |
density.algorithms <- qqplotGaussian(data) + theme_bw() | |
if(isEnglish){ | |
density.all <- plotDensities(data=data, size=1) | |
#Q-Q plot | |
qq.plot <- qqnorm(df); qqline(df, col=2) | |
list.to.return <- list(Shapiro.Test = shapiro.result, Densities = densities, Density.Algorithms = density.algorithms, Data.Density = density.all, QQ.Plot = qq.plot) | |
return (list.to.return) | |
}else{ | |
density.all <- plotDensities(data=data, size=1) + xlab("Valor") + ylab("Densidade") + guides(color=guide_legend("Algoritmos")) | |
#Q-Q plot | |
qq.plot <- qqnorm(df, main = "Quantil-Quantil da Normal", xlab = "Quantiles Teóricos", ylab = "Quantiles de Amostra"); qqline(df, col=2); | |
list.to.return <- list(Shapiro.Test = shapiro.result, Densities = densities, Density.Algorithms = density.algorithms, Data.Density = density.all, QQ.Plot = qq.plot) | |
return (list.to.return) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment