Skip to content

Instantly share code, notes, and snippets.

@benjaminrobinson
Last active June 10, 2023 21:36
Show Gist options
  • Save benjaminrobinson/7d749d1d4d05e9998c58393a62a5f275 to your computer and use it in GitHub Desktop.
Save benjaminrobinson/7d749d1d4d05e9998c58393a62a5f275 to your computer and use it in GitHub Desktop.
Taylor Swift Eras Tour Setlist Similarity
library(dplyr)
library(rvest)
library(stringr)
library(purrr)
library(tidyr)
library(janitor)
library(RecordLinkage)
library(ggplot2)
library(scales)
library(ggthemes)
library(magick)
library(forcats)
## ENTER LOCAL DIRECTORY WITHOUT QUOTE MARKS ##
dir = readline()
download.file(url = 'https://i.iheart.com/v3/re/new_assets/6372707fb66c34b0905cb19d?ops=gravity(%22north%22),fit(1200,675),quality(65)',
destfile = paste0(dir, "tour.png"),
mode = 'wb')
add_logo <- function(plot_path,
logo_path,
logo_position = 'top left',
logo_scale = 5,
save = FALSE) {
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) {
stop(
"Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'"
)
}
# read in raw images
plot <- image_read(plot_path)
logo_raw <- image_read(logo_path)
# get dimensions of plot for scaling
plot_height <- image_info(plot)$height
plot_width <- image_info(plot)$width
# default scale to 1/10th width of plot
# Can change with logo_scale
logo <-
image_scale(logo_raw, as.character(plot_width / logo_scale))
# Get width of logo
logo_width <- image_info(logo)$width
logo_height <- image_info(logo)$height
# Set position of logo
# Position starts at 0,0 at top left
# Using 0.01 for 1% - aesthetic padding
if (logo_position == "top right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "top left") {
x_pos = 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "bottom right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
} else if (logo_position == "bottom left") {
x_pos = 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
}
# Compose the actual overlay
image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos)) -> plot
if (save == TRUE) {
image_ggplot(plot)
ggsave(
plot_path,
height = 8,
width = 8,
units = 'in',
dpi = 'retina'
)
} else if (save == FALSE) {
plot(plot)
}
}
'https://www.setlist.fm/search?artist=3bd6bc5c&page=2&query=tour:%28The+Eras+Tour%29' %>%
read_html %>%
html_nodes('.listPager-lg') %>%
html_text %>%
str_squish %>%
str_split('') %>%
unlist %>%
as.numeric %>%
max -> max_pg
map(1:max_pg, function(x) {
paste0('https://www.setlist.fm/search?artist=3bd6bc5c&page=', x, '&query=tour:%28The+Eras+Tour%29') %>%
read_html %>%
html_nodes('a') %>%
html_attr('href') %>%
str_squish %>%
unique %>%
.[grepl("setlist/taylor-swift", .)] %>%
paste0('https://www.setlist.fm/', .)
}) %>%
unlist -> swift_set
map_dfr(swift_set, function(x) {
Sys.sleep(5)
print(x)
read_html(x) -> web
tibble(
show_url = x,
show_id = sub(".*2023/", "", x) %>%
sub(".html", "", .),
date = web %>%
html_nodes('.dateBlock') %>%
html_text %>%
str_squish %>%
as.Date(format = "%b %d %Y"),
location = web %>%
html_nodes('.setlistHeadline') %>%
html_text %>%
str_squish %>%
sub(" Edit.*", "", .) %>%
sub("Taylor Swift Setlist at ", "", .),
song_name = web %>%
html_nodes('.songLabel') %>%
html_text %>%
str_squish %>%
unique,
song_id = web %>%
html_nodes('.songLabel') %>%
html_attr('href') %>%
sub(".*=", "", .) %>%
unique
) %>%
mutate(
song_number = row_number()
)
}) %>% distinct -> swift_songs
swift_songs %>%
group_by(date,
show = sub(", USA", "", location) %>%
sub(", ", " (", .) %>%
paste0(., ")"),
show_id) %>%
summarize(songs = paste(song_name %>% sort, collapse = '| ')) %>%
ungroup %>%
group_by(show) %>%
mutate(id = row_number()) %>%
ungroup %>%
mutate(show = paste0(show, " - ", id),
show = forcats::fct_reorder(show, date)) %>%
select(-id, -show_id, -date) %>%
arrange(show) -> swift_sum
suppressWarnings(windowsFonts("Tahoma" = windowsFont("Tahoma")))
swift_sum %>%
select(-songs) %>%
bind_cols(map_dfc(1:nrow(swift_sum), function(x) {
swift_sum %>%
mutate(sim = jarowinkler(songs, songs[x])) %>%
select(sim) %>%
setNames(swift_sum$show[x])
})) %>%
gather(comp, similarity, -show) %>%
mutate(comp = factor(comp, levels = swift_sum$show)) %>%
ggplot(aes(
x = show,
y = comp,
fill = similarity,
label = percent(similarity, accuracy = 1),
fontface = ifelse(show == comp, "bold", "plain")
)) +
geom_tile(color = 'black') +
geom_text(size = 2.5) +
scale_fill_gradient(low = '#FFFFFF',
high = '#b8396b',
labels = percent) +
scale_x_discrete(position = "top") +
theme_foundation(base_size = 14, base_family = 'Tahoma') +
theme(
axis.text.x = element_text(angle = 45, hjust = -.05),
legend.position = 'right',
legend.direction = 'vertical',
plot.title = element_text(
face = "bold",
size = rel(1.2),
hjust = 0.5
),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold"),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line.x = element_line(colour = "black"),
axis.line.y.left = element_line(colour = "black"),
axis.line.y.right = element_blank(),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA)
) +
labs(
x = NULL,
y = NULL,
fill = 'Jarow-Winkler Score',
title = "Taylor Swift's Era's Tour Setlist Similarity",
subtitle = NULL,
caption = "Chart: Benjamin Robinson (@benj_robinson) | Data: SetList.FM, 2023."
)
ggsave(
paste0(dir, "tswift.PNG"),
height = 7.5,
width = 15,
units = 'in',
dpi = 'retina'
)
add_logo(plot_path = paste0(dir, "tswift.PNG"),
logo_path = paste0(dir, "tour.PNG"),
save = TRUE)
image_read(path = paste0(dir, "tswift.PNG")) %>%
image_trim %>%
image_write(path = paste0(dir, "tswift.PNG"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment