Last active
January 9, 2025 12:18
-
-
Save RHDZMOTA/e77848e592c6d592e8ff to your computer and use it in GitHub Desktop.
Download and clean data from the World Health Organization - Global Health Observatory: Consumption of pure alcohol by type of beverage. The database contains data from 2010 of nearly 50 different countries.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Radar Plots | |
# Example: consumption of pure alcohol by type of beverage | |
library(tibble) | |
# Download data ----------------------------------------------------------- | |
# Data by country and type of beverage | |
url <- "http://apps.who.int/gho/athena/data/data-text.csv?target=GHO/SA_0000001398&profile=text&filter=COUNTRY:*;REGION:EUR;ALCOHOLTYPE:*" | |
# Download and read files | |
file_name <- "consum_type" | |
download.file(url, file_name) | |
dataset <- read.table(file = file_name, header = TRUE, sep = ",", | |
stringsAsFactors = FALSE, na.strings = FALSE) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Clean data -------------------------------------------------------------- | |
library(tibble) | |
# Use data_frames to manipulate data. | |
# Index created to help filter relevant data | |
# Some columns are useless | |
dataset <- as_data_frame(dataset) | |
dataset <- dataset[, c("Country", "Beverage.Types", "Numeric")] | |
index <- 1:nrow(dataset) | |
dataset$Index <- index | |
# Unique countries, problem: some names are too long | |
# Solve problem (avoid loops when possible) | |
countries_unique <- unique(dataset$Country) | |
countries_problem <- c("Russian Federation", | |
"United Kingdom of Great Britain and Northern Ireland", | |
"The former Yugoslav republic of Macedonia", | |
"Republic of Moldova") | |
countries_solution <- c("Russia", "UK", "Macedonia", "Moldova") | |
countries_problem_index <- sapply(X = countries_problem, | |
FUN = function (x) dataset[dataset$Country == x, "Index"]$Index ) | |
for(i in 1:ncol(countries_problem_index)){ | |
dataset[countries_problem_index[,i], "Country"] <- countries_solution[i] | |
} | |
# Other problem: "Other" category is too large | |
# Solve problem (avoid loops when possible) | |
other_inconvinient <- dataset[dataset$Beverage.Types == "Other alcoholic beverages", "Index"]$Index | |
dataset[other_inconvinient, "Beverage.Types"] <- "Other" | |
# Add row to the data frame that contains the average consumption | |
beverage_unique <- unique(dataset$Beverage.Types) | |
beverage_average <- sapply(X = sapply(X = beverage_unique, | |
FUN = function(x) | |
dataset[dataset$Beverage.Types == x, "Numeric"]), | |
FUN = mean, na.rm = T) | |
names(beverage_average) <- beverage_unique | |
Numeric <- beverage_average | |
Beverage.Types <- beverage_unique | |
Country <- rep("Average Country", length(beverage_unique)) | |
Index <- rep(as.character(nrow(dataset)+1), length(beverage_unique)) | |
df_aux <- data_frame(Country = Country, Beverage.Types = Beverage.Types, | |
Numeric = Numeric, Index = Index) | |
dataset <- rbind(dataset, df_aux) | |
dataset <- dataset[order(dataset$Beverage.Types), ] | |
df_aux <- df_aux[order(df_aux$Beverage.Types),] | |
avrg_df <- df_aux | |
# Identify position (index) of average country | |
# for future use in plot | |
countries_unique <- unique(dataset$Country) | |
countries_unique <- countries_unique[order(countries_unique)] | |
countries_unique_index <- 1:length(countries_unique) | |
df_aux <- data_frame(countries = countries_unique, | |
index = countries_unique_index) | |
avr_country <- "Average Country" | |
avr_index <- df_aux[df_aux$countries == avr_country, "index"]$index | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment