library(tidyverse)
library(widyr)
library(maps)
#>
#> Attaching package: 'maps'
#> The following object is masked from 'package:purrr':
#>
#> map
eurovision_votes <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-17/eurovision-votes.csv')
#> Rows: 56312 Columns: 8
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (6): semi_final, edition, jury_or_televoting, from_country, to_country, ...
#> dbl (2): year, points
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(234)
eurovision_clusters <-
eurovision_votes %>%
widely_svd(from_country, to_country, points, nv = 24) %>%
widely_kmeans(from_country, dimension, value, k = 4)
eurovision_clusters %>%
group_by(cluster) %>%
summarise(from_country = paste(from_country, collapse = ", ")) %>%
knitr::kable()
cluster | from_country |
---|---|
1 | Belarus, Belgium, Bosnia & Herzegovina, Bulgaria, Estonia, Greece, Israel, Latvia, Lithuania, Poland, Romania, Russia, San Marino, Slovakia, United Kingdom |
2 | Albania, Armenia, Austria, Azerbaijan, Croatia, Czech Republic, F.Y.R. Macedonia, Finland, France, Georgia, Germany, Luxembourg, Macedonia, Malta, Monaco, Montenegro, North Macedonia, Norway, Portugal, Serbia, Serbia & Montenegro, Slovenia, Spain, Sweden, Switzerland, Yugoslavia |
3 | Andorra, Cyprus, Denmark, Ireland, Italy, Morocco, Netherlands, The Netherlands |
4 | Australia, Hungary, Iceland, Moldova, Turkey, Ukraine |
map_data("world") %>%
filter(region %in% eurovision_clusters$from_country) %>%
left_join(eurovision_clusters, by = c("region" = "from_country")) %>%
ggplot(aes(long, lat, group = group, fill = cluster)) +
geom_polygon(alpha = 0.8) +
coord_map() +
scale_fill_brewer(palette = "Dark2") +
silgelib::theme_plex() +
labs(title = "Which countries vote similarly for Eurovision?",
subtitle = "Clusters identified with SVD and K-means")
Created on 2022-05-26 by the reprex package (v2.0.1)
@juliasilge great work. Australia was a surprise.
Seems like we have a data issue here. The dataset uses both "The Netherlands" (up to 2021) and "Netherlands" (2022) to refer to the same country. May want to fix that to see if it changes the analysis. Do not know if it actually would make an impact.
Just added a stringr command to search and replace in the two colums.
Netherlands now fall into the same cluster as the rest of Western Europe.