Last active
June 10, 2023 21:36
-
-
Save benjaminrobinson/7d749d1d4d05e9998c58393a62a5f275 to your computer and use it in GitHub Desktop.
Taylor Swift Eras Tour Setlist Similarity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(dplyr) | |
| library(rvest) | |
| library(stringr) | |
| library(purrr) | |
| library(tidyr) | |
| library(janitor) | |
| library(RecordLinkage) | |
| library(ggplot2) | |
| library(scales) | |
| library(ggthemes) | |
| library(magick) | |
| library(forcats) | |
| ## ENTER LOCAL DIRECTORY WITHOUT QUOTE MARKS ## | |
| dir = readline() | |
| download.file(url = 'https://i.iheart.com/v3/re/new_assets/6372707fb66c34b0905cb19d?ops=gravity(%22north%22),fit(1200,675),quality(65)', | |
| destfile = paste0(dir, "tour.png"), | |
| mode = 'wb') | |
| add_logo <- function(plot_path, | |
| logo_path, | |
| logo_position = 'top left', | |
| logo_scale = 5, | |
| save = FALSE) { | |
| if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) { | |
| stop( | |
| "Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'" | |
| ) | |
| } | |
| # read in raw images | |
| plot <- image_read(plot_path) | |
| logo_raw <- image_read(logo_path) | |
| # get dimensions of plot for scaling | |
| plot_height <- image_info(plot)$height | |
| plot_width <- image_info(plot)$width | |
| # default scale to 1/10th width of plot | |
| # Can change with logo_scale | |
| logo <- | |
| image_scale(logo_raw, as.character(plot_width / logo_scale)) | |
| # Get width of logo | |
| logo_width <- image_info(logo)$width | |
| logo_height <- image_info(logo)$height | |
| # Set position of logo | |
| # Position starts at 0,0 at top left | |
| # Using 0.01 for 1% - aesthetic padding | |
| if (logo_position == "top right") { | |
| x_pos = plot_width - logo_width - 0.01 * plot_width | |
| y_pos = 0.01 * plot_height | |
| } else if (logo_position == "top left") { | |
| x_pos = 0.01 * plot_width | |
| y_pos = 0.01 * plot_height | |
| } else if (logo_position == "bottom right") { | |
| x_pos = plot_width - logo_width - 0.01 * plot_width | |
| y_pos = plot_height - logo_height - 0.01 * plot_height | |
| } else if (logo_position == "bottom left") { | |
| x_pos = 0.01 * plot_width | |
| y_pos = plot_height - logo_height - 0.01 * plot_height | |
| } | |
| # Compose the actual overlay | |
| image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos)) -> plot | |
| if (save == TRUE) { | |
| image_ggplot(plot) | |
| ggsave( | |
| plot_path, | |
| height = 8, | |
| width = 8, | |
| units = 'in', | |
| dpi = 'retina' | |
| ) | |
| } else if (save == FALSE) { | |
| plot(plot) | |
| } | |
| } | |
| 'https://www.setlist.fm/search?artist=3bd6bc5c&page=2&query=tour:%28The+Eras+Tour%29' %>% | |
| read_html %>% | |
| html_nodes('.listPager-lg') %>% | |
| html_text %>% | |
| str_squish %>% | |
| str_split('') %>% | |
| unlist %>% | |
| as.numeric %>% | |
| max -> max_pg | |
| map(1:max_pg, function(x) { | |
| paste0('https://www.setlist.fm/search?artist=3bd6bc5c&page=', x, '&query=tour:%28The+Eras+Tour%29') %>% | |
| read_html %>% | |
| html_nodes('a') %>% | |
| html_attr('href') %>% | |
| str_squish %>% | |
| unique %>% | |
| .[grepl("setlist/taylor-swift", .)] %>% | |
| paste0('https://www.setlist.fm/', .) | |
| }) %>% | |
| unlist -> swift_set | |
| map_dfr(swift_set, function(x) { | |
| Sys.sleep(5) | |
| print(x) | |
| read_html(x) -> web | |
| tibble( | |
| show_url = x, | |
| show_id = sub(".*2023/", "", x) %>% | |
| sub(".html", "", .), | |
| date = web %>% | |
| html_nodes('.dateBlock') %>% | |
| html_text %>% | |
| str_squish %>% | |
| as.Date(format = "%b %d %Y"), | |
| location = web %>% | |
| html_nodes('.setlistHeadline') %>% | |
| html_text %>% | |
| str_squish %>% | |
| sub(" Edit.*", "", .) %>% | |
| sub("Taylor Swift Setlist at ", "", .), | |
| song_name = web %>% | |
| html_nodes('.songLabel') %>% | |
| html_text %>% | |
| str_squish %>% | |
| unique, | |
| song_id = web %>% | |
| html_nodes('.songLabel') %>% | |
| html_attr('href') %>% | |
| sub(".*=", "", .) %>% | |
| unique | |
| ) %>% | |
| mutate( | |
| song_number = row_number() | |
| ) | |
| }) %>% distinct -> swift_songs | |
| swift_songs %>% | |
| group_by(date, | |
| show = sub(", USA", "", location) %>% | |
| sub(", ", " (", .) %>% | |
| paste0(., ")"), | |
| show_id) %>% | |
| summarize(songs = paste(song_name %>% sort, collapse = '| ')) %>% | |
| ungroup %>% | |
| group_by(show) %>% | |
| mutate(id = row_number()) %>% | |
| ungroup %>% | |
| mutate(show = paste0(show, " - ", id), | |
| show = forcats::fct_reorder(show, date)) %>% | |
| select(-id, -show_id, -date) %>% | |
| arrange(show) -> swift_sum | |
| suppressWarnings(windowsFonts("Tahoma" = windowsFont("Tahoma"))) | |
| swift_sum %>% | |
| select(-songs) %>% | |
| bind_cols(map_dfc(1:nrow(swift_sum), function(x) { | |
| swift_sum %>% | |
| mutate(sim = jarowinkler(songs, songs[x])) %>% | |
| select(sim) %>% | |
| setNames(swift_sum$show[x]) | |
| })) %>% | |
| gather(comp, similarity, -show) %>% | |
| mutate(comp = factor(comp, levels = swift_sum$show)) %>% | |
| ggplot(aes( | |
| x = show, | |
| y = comp, | |
| fill = similarity, | |
| label = percent(similarity, accuracy = 1), | |
| fontface = ifelse(show == comp, "bold", "plain") | |
| )) + | |
| geom_tile(color = 'black') + | |
| geom_text(size = 2.5) + | |
| scale_fill_gradient(low = '#FFFFFF', | |
| high = '#b8396b', | |
| labels = percent) + | |
| scale_x_discrete(position = "top") + | |
| theme_foundation(base_size = 14, base_family = 'Tahoma') + | |
| theme( | |
| axis.text.x = element_text(angle = 45, hjust = -.05), | |
| legend.position = 'right', | |
| legend.direction = 'vertical', | |
| plot.title = element_text( | |
| face = "bold", | |
| size = rel(1.2), | |
| hjust = 0.5 | |
| ), | |
| strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), | |
| strip.text = element_text(face = "bold"), | |
| panel.background = element_rect(colour = NA), | |
| plot.background = element_rect(colour = NA), | |
| panel.border = element_rect(colour = NA), | |
| axis.title = element_text(face = "bold", size = rel(1)), | |
| axis.title.y = element_text(angle = 90, vjust = 2), | |
| axis.title.x = element_text(vjust = -0.2), | |
| axis.text = element_text(), | |
| axis.line.x = element_line(colour = "black"), | |
| axis.line.y.left = element_line(colour = "black"), | |
| axis.line.y.right = element_blank(), | |
| axis.ticks = element_line(), | |
| panel.grid.major = element_line(colour = "#f0f0f0"), | |
| panel.grid.minor = element_blank(), | |
| legend.key = element_rect(colour = NA) | |
| ) + | |
| labs( | |
| x = NULL, | |
| y = NULL, | |
| fill = 'Jarow-Winkler Score', | |
| title = "Taylor Swift's Era's Tour Setlist Similarity", | |
| subtitle = NULL, | |
| caption = "Chart: Benjamin Robinson (@benj_robinson) | Data: SetList.FM, 2023." | |
| ) | |
| ggsave( | |
| paste0(dir, "tswift.PNG"), | |
| height = 7.5, | |
| width = 15, | |
| units = 'in', | |
| dpi = 'retina' | |
| ) | |
| add_logo(plot_path = paste0(dir, "tswift.PNG"), | |
| logo_path = paste0(dir, "tour.PNG"), | |
| save = TRUE) | |
| image_read(path = paste0(dir, "tswift.PNG")) %>% | |
| image_trim %>% | |
| image_write(path = paste0(dir, "tswift.PNG")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment