Created
April 2, 2023 12:22
-
-
Save benjaminrobinson/dab9cff80c233b9b5d287435dbd6b988 to your computer and use it in GitHub Desktop.
Springsteen Setlist Similarity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(rvest) | |
library(stringr) | |
library(purrr) | |
library(tidyr) | |
library(janitor) | |
library(RecordLinkage) | |
library(ggplot2) | |
library(scales) | |
library(ggthemes) | |
library(magick) | |
## ENTER LOCAL DIRECTORY WITHOUT QUOTE MARKS ## | |
dir = readline() | |
download.file(url = 'https://blog.ticketmaster.com/wp-content/uploads/BruceSpringsteen_Blog.jpg', | |
destfile = paste0(dir, "tour.png"), | |
mode = 'wb') | |
add_logo <- function(plot_path, | |
logo_path, | |
logo_position = 'top left', | |
logo_scale = 5, | |
save = FALSE) { | |
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) { | |
stop( | |
"Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'" | |
) | |
} | |
# read in raw images | |
plot <- image_read(plot_path) | |
logo_raw <- image_read(logo_path) | |
# get dimensions of plot for scaling | |
plot_height <- image_info(plot)$height | |
plot_width <- image_info(plot)$width | |
# default scale to 1/10th width of plot | |
# Can change with logo_scale | |
logo <- | |
image_scale(logo_raw, as.character(plot_width / logo_scale)) | |
# Get width of logo | |
logo_width <- image_info(logo)$width | |
logo_height <- image_info(logo)$height | |
# Set position of logo | |
# Position starts at 0,0 at top left | |
# Using 0.01 for 1% - aesthetic padding | |
if (logo_position == "top right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "top left") { | |
x_pos = 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "bottom right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} else if (logo_position == "bottom left") { | |
x_pos = 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} | |
# Compose the actual overlay | |
image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos)) -> plot | |
if (save == TRUE) { | |
image_ggplot(plot) | |
ggsave( | |
plot_path, | |
height = 8, | |
width = 8, | |
units = 'in', | |
dpi = 'retina' | |
) | |
} else if (save == FALSE) { | |
plot(plot) | |
} | |
} | |
map(1:3, function(x) { | |
paste0('https://www.setlist.fm/setlists/bruce-springsteen-2bd6dcce.html?page=', | |
x) %>% | |
read_html %>% | |
html_nodes('a') %>% | |
html_attr('href') %>% | |
str_squish %>% | |
.[grepl('setlist/bruce-springsteen/2023/', .)] %>% | |
sub("..", "", .) %>% | |
paste0('https://www.setlist.fm/', .) | |
}) %>% | |
unlist -> boss_set | |
map_dfr(boss_set, function(x) { | |
Sys.sleep(5) | |
print(x) | |
read_html(x) -> web | |
tibble( | |
date = web %>% | |
html_nodes('.dateBlock') %>% | |
html_text %>% | |
str_squish %>% | |
as.Date(format = "%b %d %Y"), | |
location = web %>% | |
html_nodes('.setlistHeadline') %>% | |
html_text %>% | |
str_squish %>% | |
sub(" Edit.*", "", .) %>% | |
sub("Bruce Springsteen Setlist at ", "", .), | |
songs = web %>% | |
html_nodes('.songPart') %>% | |
html_text %>% | |
str_squish, | |
set_length = web %>% | |
html_nodes('.hiddenCollapsed') %>% | |
html_text %>% | |
str_squish %>% | |
unique %>% | |
.[grepl("End", .)] %>% | |
sub(".*[A-Z][M]", "", .) %>% | |
sub(" long", "", .) | |
) %>% | |
mutate( | |
song_number = row_number(), | |
set_hrs = sub("[h].*", "", set_length) %>% as.numeric, | |
set_mins = sub(".*[h] ", "", set_length) %>% | |
sub("m", "", .) %>% | |
as.numeric | |
) | |
}) %>% | |
group_by(date, location, set_length, set_hrs, set_mins) %>% | |
mutate(show_id = cur_group_id()) %>% | |
ungroup %>% | |
select(8, | |
1:2, | |
4, | |
6:7, | |
5:3) -> boss_songs | |
boss_songs %>% | |
group_by(show = sub(", USA", "", location) %>% | |
sub(", ", " (", .) %>% | |
paste0(., ")"), | |
show_id) %>% | |
summarize(songs = paste(songs %>% sort, collapse = '| ')) %>% | |
ungroup %>% | |
select(-show_id) -> boss_sum | |
suppressWarnings(windowsFonts("Tahoma" = windowsFont("Tahoma"))) | |
theme_gtm <- function(base_size = 14, | |
base_family = "Tahoma") { | |
( | |
theme_foundation(base_size = base_size, base_family = base_family) | |
+ theme( | |
plot.title = element_text( | |
face = "bold", | |
size = rel(1.2), | |
hjust = 0.5 | |
), | |
text = element_text(), | |
panel.background = element_rect(colour = NA), | |
plot.background = element_rect(colour = NA), | |
panel.border = element_rect(colour = NA), | |
axis.title = element_text(face = "bold", size = rel(1)), | |
axis.title.y = element_text(angle = 90, vjust = 2), | |
axis.title.x = element_text(vjust = -0.2), | |
axis.text = element_text(), | |
axis.line.x = element_line(colour = "black"), | |
axis.line.y.left = element_line(colour = "black"), | |
axis.line.y.right = element_blank(), | |
axis.ticks = element_line(), | |
panel.grid.major = element_line(colour = "#f0f0f0"), | |
panel.grid.minor = element_blank(), | |
legend.key = element_rect(colour = NA), | |
legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.key.size = unit(0.2, "cm"), | |
legend.spacing = unit(0, "cm"), | |
legend.title = element_text(face = "bold"), | |
plot.margin = unit(c(15, 5, 5, 5), "mm"), | |
plot.subtitle = element_text(hjust = 0.5), | |
plot.caption = element_text(hjust = 0.5), | |
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), | |
strip.text = element_text(face = "bold") | |
) | |
) | |
} | |
boss_sum %>% | |
select(-songs) %>% | |
bind_cols(map_dfc(1:nrow(boss_sum), function(x) { | |
boss_sum %>% | |
mutate(sim = jarowinkler(songs, songs[x])) %>% | |
select(sim) %>% | |
setNames(boss_sum[1][x, ]) | |
})) %>% | |
gather(comp, similarity,-show) %>% | |
ggplot(aes( | |
x = show, | |
y = comp, | |
fill = similarity, | |
label = percent(similarity, accuracy = .1), | |
fontface = ifelse(show == comp, "bold", "plain") | |
)) + | |
geom_tile(color = 'black') + | |
geom_text(size = 3) + | |
scale_fill_gradient(low = '#FFFFFF', | |
high = '#f44c3c', | |
labels = percent) + | |
scale_x_discrete(position = "top") + | |
theme_foundation(base_size = 14, base_family = 'Tahoma') + | |
theme( | |
axis.text.x = element_text(angle = 45, hjust = -.05), | |
legend.position = 'right', | |
legend.direction = 'vertical', | |
plot.title = element_text( | |
face = "bold", | |
size = rel(1.2), | |
hjust = 0.5 | |
), | |
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), | |
strip.text = element_text(face = "bold"), | |
panel.background = element_rect(colour = NA), | |
plot.background = element_rect(colour = NA), | |
panel.border = element_rect(colour = NA), | |
axis.title = element_text(face = "bold", size = rel(1)), | |
axis.title.y = element_text(angle = 90, vjust = 2), | |
axis.title.x = element_text(vjust = -0.2), | |
axis.text = element_text(), | |
axis.line.x = element_line(colour = "black"), | |
axis.line.y.left = element_line(colour = "black"), | |
axis.line.y.right = element_blank(), | |
axis.ticks = element_line(), | |
panel.grid.major = element_line(colour = "#f0f0f0"), | |
panel.grid.minor = element_blank(), | |
legend.key = element_rect(colour = NA) | |
) + | |
labs( | |
x = NULL, | |
y = NULL, | |
fill = 'Jarow-Winkler Score', | |
title = "Bruce Springsteen 2023 Tour Setlist Similarity", | |
subtitle = NULL, | |
caption = "Chart: Benjamin Robinson (@benj_robinson) | Data: SetList.FM, 2023." | |
) | |
ggsave( | |
paste0(dir, "bruce.PNG"), | |
height = 7.5, | |
width = 15, | |
units = 'in', | |
dpi = 'retina' | |
) | |
add_logo(plot_path = paste0(dir, "bruce.PNG"), | |
logo_path = paste0(dir, "tour.PNG"), | |
save = TRUE) | |
image_read(path = paste0(dir, "bruce.PNG")) %>% | |
image_trim %>% | |
image_write(path = paste0(dir, "bruce.PNG")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment