jacksonpradolima · November 29, 2016 17:07
diff --git a/evaluate.data.distribution.R b/evaluate.data.distribution.R
 #' @title Evaluate the data distribution
 #'
 #' \code{evaluate.data.distribution} returns the data evaluation of a matrix,
 #' This function contains the Shapiro-Wilk W test, and graphs and plots from DescTools and scmamp packages
 #'
 #' @description This function is a wrapper to evaluate when there is multiple comparison tests.
 #' @param data
 #' @param isEnglish
 #' @return A list with the evaluation results
 #' 
 #' @examples
 #' 
 #' Considering this data in a CSV file:
 #' Instance;NSGAII_candidate_1;NSGAII_candidate_2;NSGAII_candidate_3;NSGAII_candidate_4;NSGAII_candidate_5
 #' Bisect;0.2974943182;0.4428475379;0.3952869318;0.4165956439;0.373250947
 #' Bub;0.4726869838;0.4723876946;0.4904889349;0.4858916803;0.4519410631
 #' Find;0.5496787836;0.5634362087;0.5319182593;0.5007053848;0.5024744964
 #' Fourballs;0.0;0.0;0.0;0.0;0.0
 #' Mid;0.0;0.0;0.0;0.0;0.0
 #' Triangulo;0.4719169351;0.4573268921;0.4447312802;0.4377935454;0.4120169082
 #' 
 #' This data constains the means. 
 #' After, You must to use the prepare.data.statistical.test function 
 #' (another function available in https://gist.github.com/jacksonpradolima/2eeac87390268eebb4e57862f076be8a)
 #' and set the variable my_data with the function result: 
 #' my_data <- prepare.data.statistical.test("my_data.csv")
 #' 
 #' Now, We can to use the function evaluate.data.distribution(my_data) to see the distribution in English or
 #' evaluate.data.distribution(my_data, isEnglish = FALSE) to see the distribution in Portuguese.
 evaluate.data.distribution <- function(data, isEnglish=TRUE){
  
  # Loading needed packages
  if(!require(ggplot2))
  {
    print("You are missing the package 'ggplot2', we will now try to install it...")
    install.packages("ggplot2")
    library(ggplot2)
  }
  
  if(!require(DescTools))
  {
    print("You are missing the package 'DescTools', we will now try to install it...")
    install.packages("DescTools")
    library(DescTools)
  }
  
  if(!require(scmamp))
  {
    print("You are missing the package 'scmamp', we will now try to install it...")
    install.packages("scmamp")
    library(scmamp)
  }
  
  if(is.data.frame(data)){
    df <- as.vector(data.matrix(data))
    
    #Check the normality, if p-value > 0.05 is normal
    shapiro.result <- shapiro.test(df) 
    
    if(shapiro.result$p.value > 0.05){
      print("The distribution is normal.")
    }else{
      print("The distribution is not-normal.")
    }
    
    # Plot the densities
    densities <- Desc(df, main="", plotit = TRUE)
    
    # Densities by algorithm
    density.algorithms <- qqplotGaussian(data) + theme_bw()
    
    if(isEnglish){
      density.all <- plotDensities(data=data, size=1)
      
      #Q-Q plot
      qq.plot <- qqnorm(df); qqline(df, col=2)
      
      list.to.return <- list(Shapiro.Test = shapiro.result, Densities = densities, Density.Algorithms = density.algorithms, Data.Density = density.all, QQ.Plot = qq.plot)
      return (list.to.return)
    }else{
      density.all <- plotDensities(data=data, size=1) + xlab("Valor") + ylab("Densidade") + guides(color=guide_legend("Algoritmos"))
      
      #Q-Q plot
      qq.plot <- qqnorm(df, main = "Quantil-Quantil da Normal", xlab = "Quantiles Teóricos", ylab = "Quantiles de Amostra");  qqline(df, col=2);
      
      list.to.return <- list(Shapiro.Test = shapiro.result, Densities = densities, Density.Algorithms = density.algorithms, Data.Density = density.all, QQ.Plot = qq.plot)
      return (list.to.return)
    }
  }
 }
	#' @title Evaluate the data distribution
	#'
	#' \code{evaluate.data.distribution} returns the data evaluation of a matrix,
	#' This function contains the Shapiro-Wilk W test, and graphs and plots from DescTools and scmamp packages
	#'
	#' @description This function is a wrapper to evaluate when there is multiple comparison tests.
	#' @param data
	#' @param isEnglish
	#' @return A list with the evaluation results
	#'
	#' @examples
	#'
	#' Considering this data in a CSV file:
	#' Instance;NSGAII_candidate_1;NSGAII_candidate_2;NSGAII_candidate_3;NSGAII_candidate_4;NSGAII_candidate_5
	#' Bisect;0.2974943182;0.4428475379;0.3952869318;0.4165956439;0.373250947
	#' Bub;0.4726869838;0.4723876946;0.4904889349;0.4858916803;0.4519410631
	#' Find;0.5496787836;0.5634362087;0.5319182593;0.5007053848;0.5024744964
	#' Fourballs;0.0;0.0;0.0;0.0;0.0
	#' Mid;0.0;0.0;0.0;0.0;0.0
	#' Triangulo;0.4719169351;0.4573268921;0.4447312802;0.4377935454;0.4120169082
	#'
	#' This data constains the means.
	#' After, You must to use the prepare.data.statistical.test function
	#' (another function available in https://gist.github.com/jacksonpradolima/2eeac87390268eebb4e57862f076be8a)
	#' and set the variable my_data with the function result:
	#' my_data <- prepare.data.statistical.test("my_data.csv")
	#'
	#' Now, We can to use the function evaluate.data.distribution(my_data) to see the distribution in English or
	#' evaluate.data.distribution(my_data, isEnglish = FALSE) to see the distribution in Portuguese.
	evaluate.data.distribution <- function(data, isEnglish=TRUE){

	# Loading needed packages
	if(!require(ggplot2))
	{
	print("You are missing the package 'ggplot2', we will now try to install it...")
	install.packages("ggplot2")
	library(ggplot2)
	}

	if(!require(DescTools))
	{
	print("You are missing the package 'DescTools', we will now try to install it...")
	install.packages("DescTools")
	library(DescTools)
	}

	if(!require(scmamp))
	{
	print("You are missing the package 'scmamp', we will now try to install it...")
	install.packages("scmamp")
	library(scmamp)
	}

	if(is.data.frame(data)){
	df <- as.vector(data.matrix(data))

	#Check the normality, if p-value > 0.05 is normal
	shapiro.result <- shapiro.test(df)

	if(shapiro.result$p.value > 0.05){
	print("The distribution is normal.")
	}else{
	print("The distribution is not-normal.")
	}

	# Plot the densities
	densities <- Desc(df, main="", plotit = TRUE)

	# Densities by algorithm
	density.algorithms <- qqplotGaussian(data) + theme_bw()

	if(isEnglish){
	density.all <- plotDensities(data=data, size=1)

	#Q-Q plot
	qq.plot <- qqnorm(df); qqline(df, col=2)

	list.to.return <- list(Shapiro.Test = shapiro.result, Densities = densities, Density.Algorithms = density.algorithms, Data.Density = density.all, QQ.Plot = qq.plot)
	return (list.to.return)
	}else{
	density.all <- plotDensities(data=data, size=1) + xlab("Valor") + ylab("Densidade") + guides(color=guide_legend("Algoritmos"))

	#Q-Q plot
	qq.plot <- qqnorm(df, main = "Quantil-Quantil da Normal", xlab = "Quantiles Teóricos", ylab = "Quantiles de Amostra"); qqline(df, col=2);

	list.to.return <- list(Shapiro.Test = shapiro.result, Densities = densities, Density.Algorithms = density.algorithms, Data.Density = density.all, QQ.Plot = qq.plot)
	return (list.to.return)
	}
	}
	}