RHDZMOTA · January 9, 2025 12:18
diff --git a/alcohol_data_download.R b/alcohol_data_download.R
 # Radar Plots 
 # Example: consumption of pure alcohol by type of beverage
 library(tibble)


 # Download data -----------------------------------------------------------
 # Data by country and type of beverage
 url <- "http://apps.who.int/gho/athena/data/data-text.csv?target=GHO/SA_0000001398&profile=text&filter=COUNTRY:*;REGION:EUR;ALCOHOLTYPE:*"

 # Download and read files
 file_name <- "consum_type"
 download.file(url, file_name)
 dataset <- read.table(file = file_name, header = TRUE, sep = ",", 
                       stringsAsFactors = FALSE, na.strings = FALSE)
diff --git a/data_cleaning.R b/data_cleaning.R
 # Clean data --------------------------------------------------------------
 library(tibble)

 # Use data_frames to manipulate data.
 # Index created to help filter relevant data
 # Some columns are useless 
 dataset <- as_data_frame(dataset)
 dataset <- dataset[, c("Country", "Beverage.Types", "Numeric")]
 index <- 1:nrow(dataset)
 dataset$Index <- index

 # Unique countries, problem: some names are too long
 # Solve problem (avoid loops when possible)
 countries_unique <- unique(dataset$Country)
 countries_problem <- c("Russian Federation",
                       "United Kingdom of Great Britain and Northern Ireland",
                       "The former Yugoslav republic of Macedonia",
                       "Republic of Moldova")
 countries_solution <- c("Russia", "UK", "Macedonia", "Moldova")

 countries_problem_index <- sapply(X = countries_problem, 
            FUN = function (x) dataset[dataset$Country == x, "Index"]$Index )

 for(i in 1:ncol(countries_problem_index)){
  dataset[countries_problem_index[,i], "Country"] <- countries_solution[i]
 }

 # Other problem: "Other" category is too large
 # Solve problem (avoid loops when possible)
 other_inconvinient <- dataset[dataset$Beverage.Types == "Other alcoholic beverages", "Index"]$Index
 dataset[other_inconvinient, "Beverage.Types"] <- "Other"

 # Add row to the data frame that contains the average consumption
 beverage_unique <- unique(dataset$Beverage.Types)
 beverage_average <- sapply(X = sapply(X = beverage_unique,
                                      FUN = function(x)
                                        dataset[dataset$Beverage.Types == x, "Numeric"]),
                           FUN = mean, na.rm = T)
 names(beverage_average) <- beverage_unique
 Numeric <- beverage_average
 Beverage.Types <- beverage_unique
 Country <- rep("Average Country", length(beverage_unique))
 Index <- rep(as.character(nrow(dataset)+1), length(beverage_unique))
 df_aux <- data_frame(Country = Country, Beverage.Types = Beverage.Types,
                        Numeric = Numeric, Index = Index)

 dataset <- rbind(dataset, df_aux)
 dataset <- dataset[order(dataset$Beverage.Types), ]
 df_aux  <- df_aux[order(df_aux$Beverage.Types),]
 avrg_df <- df_aux

 # Identify position (index) of average country
 # for future use in plot
 countries_unique <- unique(dataset$Country)
 countries_unique <- countries_unique[order(countries_unique)]
 countries_unique_index <- 1:length(countries_unique)
 df_aux <- data_frame(countries = countries_unique,
                     index = countries_unique_index)
 avr_country <- "Average Country"
 avr_index <- df_aux[df_aux$countries == avr_country, "index"]$index
	# Radar Plots
	# Example: consumption of pure alcohol by type of beverage
	library(tibble)


	# Download data -----------------------------------------------------------
	# Data by country and type of beverage
	url <- "http://apps.who.int/gho/athena/data/data-text.csv?target=GHO/SA_0000001398&profile=text&filter=COUNTRY:;REGION:EUR;ALCOHOLTYPE:"

	# Download and read files
	file_name <- "consum_type"
	download.file(url, file_name)
	dataset <- read.table(file = file_name, header = TRUE, sep = ",",
	stringsAsFactors = FALSE, na.strings = FALSE)
	# Clean data --------------------------------------------------------------
	library(tibble)

	# Use data_frames to manipulate data.
	# Index created to help filter relevant data
	# Some columns are useless
	dataset <- as_data_frame(dataset)
	dataset <- dataset[, c("Country", "Beverage.Types", "Numeric")]
	index <- 1:nrow(dataset)
	dataset$Index <- index

	# Unique countries, problem: some names are too long
	# Solve problem (avoid loops when possible)
	countries_unique <- unique(dataset$Country)
	countries_problem <- c("Russian Federation",
	"United Kingdom of Great Britain and Northern Ireland",
	"The former Yugoslav republic of Macedonia",
	"Republic of Moldova")
	countries_solution <- c("Russia", "UK", "Macedonia", "Moldova")

	countries_problem_index <- sapply(X = countries_problem,
	FUN = function (x) dataset[dataset$Country == x, "Index"]$Index )

	for(i in 1:ncol(countries_problem_index)){
	dataset[countries_problem_index[,i], "Country"] <- countries_solution[i]
	}

	# Other problem: "Other" category is too large
	# Solve problem (avoid loops when possible)
	other_inconvinient <- dataset[dataset$Beverage.Types == "Other alcoholic beverages", "Index"]$Index
	dataset[other_inconvinient, "Beverage.Types"] <- "Other"

	# Add row to the data frame that contains the average consumption
	beverage_unique <- unique(dataset$Beverage.Types)
	beverage_average <- sapply(X = sapply(X = beverage_unique,
	FUN = function(x)
	dataset[dataset$Beverage.Types == x, "Numeric"]),
	FUN = mean, na.rm = T)
	names(beverage_average) <- beverage_unique
	Numeric <- beverage_average
	Beverage.Types <- beverage_unique
	Country <- rep("Average Country", length(beverage_unique))
	Index <- rep(as.character(nrow(dataset)+1), length(beverage_unique))
	df_aux <- data_frame(Country = Country, Beverage.Types = Beverage.Types,
	Numeric = Numeric, Index = Index)

	dataset <- rbind(dataset, df_aux)
	dataset <- dataset[order(dataset$Beverage.Types), ]
	df_aux <- df_aux[order(df_aux$Beverage.Types),]
	avrg_df <- df_aux

	# Identify position (index) of average country
	# for future use in plot
	countries_unique <- unique(dataset$Country)
	countries_unique <- countries_unique[order(countries_unique)]
	countries_unique_index <- 1:length(countries_unique)
	df_aux <- data_frame(countries = countries_unique,
	index = countries_unique_index)
	avr_country <- "Average Country"
	avr_index <- df_aux[df_aux$countries == avr_country, "index"]$index