Created
September 13, 2021 00:19
-
-
Save farach/ad5927ccdcdad68d2cb6f0e4fe849828 to your computer and use it in GitHub Desktop.
Bruce Springsteen music analysis plot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(spotifyr) | |
library(tidyverse) | |
library(lubridate) | |
library(glue) | |
library(geniusr) | |
library(rvest) | |
library(tidytext) | |
# Create a ggplot2 theme | |
theme_alex <- function() { | |
font <- "Arial" | |
theme_minimal() | |
theme( | |
panel.grid.minor = element_blank(), | |
panel.grid.major.y = element_line( | |
color = "#cbcbcb" | |
), | |
panel.grid.major.x = element_blank(), | |
panel.background = element_blank(), | |
strip.background = element_rect( | |
fill = "white" | |
), | |
strip.text = element_text( | |
hjust = 0, | |
color = "#460069", | |
size = 12 | |
), | |
axis.ticks = element_blank(), | |
plot.title = element_text( | |
family = font, | |
size = 20, | |
face = "bold", | |
color = "#460069" | |
), | |
plot.subtitle = element_text( | |
family = font, | |
size = 14, | |
color = "#6a1c91", | |
hjust = 0.5 | |
), | |
plot.caption = element_text( | |
family = font, | |
size = 9, | |
hjust = 1, | |
color = "#460069" | |
), | |
axis.title = element_text( | |
family = font, | |
size = 10, | |
color = "#460069" | |
), | |
axis.text = element_text( | |
family = font, | |
size = 9, | |
color = "#460069" | |
), | |
axis.text.x = element_text( | |
margin = margin(5, b = 10) | |
), | |
legend.text.align = 0, | |
legend.background = element_blank() | |
) | |
} | |
# Spotify ---------------------------------------------------------------------- | |
Sys.setenv(SPOTIFY_CLIENT_ID = ".....") | |
Sys.setenv(SPOTIFY_CLIENT_SECRET = ".....") | |
access_token <- get_spotify_access_token() | |
bs_spotify <- get_artist_audio_features("bruce springsteen") | |
bs_spotify %>% | |
distinct(album_release_year, album_name) %>% | |
arrange(album_release_year) %>% | |
transmute( | |
year = album_release_year, | |
album_name = str_to_lower(str_replace_all(album_name, " ", "_")) | |
) %>% | |
distinct() | |
# Genius ----------------------------------------------------------------------- | |
library(genius) | |
bs_albums <- c( | |
"Greetings from Asbury Park, N.J.", | |
"The Wild, the Innocent & the E Street Shuffle", | |
"Born To Run", | |
"Darkness On the Edge of Town", | |
"The River", | |
"Nebraska", | |
"Born in the U.S.A.", | |
"Tunnel of Love", | |
"Human Touch", | |
"Lucky Town", | |
"The Ghost of Tom Joad", | |
"The Rising", | |
"Devils Dust", | |
"We Shall Overcome: The Seeger Sessions", | |
"Magic", | |
"Working on a Dream", | |
"Wrecking Ball", | |
"High Hopes", | |
"Western Stars", | |
"Letter to You" | |
) | |
bs_genius <- tibble() | |
for (i in bs_albums) { | |
bs_genius_loop <- genius_album(artist = "Bruce Springsteen", album = i) %>% | |
mutate(album = i) | |
bs_genius <- bind_rows(bs_genius, bs_genius_loop) | |
} | |
# Combine Genius and Spotify --------------------------------------------------- | |
bs_studio <- bs_spotify %>% | |
mutate(album_name = str_trim(album_name, "both")) %>% | |
filter(album_name %in% c( | |
"Greetings from Asbury Park, N.J.", | |
"The Wild, the Innocent & The E Street Shuffle", | |
"Born To Run", | |
"Darkness On the Edge of Town", | |
"The River", | |
"Nebraska", | |
"Born In The U.S.A.", | |
"Tunnel Of Love", | |
"Human Touch", | |
"Lucky Town", | |
"The Ghost Of Tom Joad", | |
"The Rising", | |
"Devils & Dust", | |
"We Shall Overcome: The Seeger Sessions (American Land Edition)", | |
"Magic", | |
"Working On A Dream", | |
"Wrecking Ball", | |
"High Hopes", | |
"Western Stars", | |
"Letter To You" | |
)) %>% | |
mutate( | |
track_name_original = track_name, | |
track_name = str_replace_all(str_trim(str_to_lower(track_name), "both"), " ", ""), | |
track_name = str_remove_all(track_name, "[:punct:]"), | |
track_number = as.numeric(track_number), | |
track_title = str_to_title(glue("{track_number} - {track_name}")), | |
track_title = | |
reorder_within(track_title, -track_number, within = album_name) # , | |
# album_name = paste(album_release_year, "-", album_name) | |
) %>% | |
left_join( | |
bs_genius %>% | |
filter(album != "Born in the U.S.A") %>% | |
distinct() %>% | |
mutate( | |
track_name = str_replace_all(str_trim(str_to_lower(track_title), "both"), " ", ""), | |
track_name = str_remove_all(track_name, "[:punct:]") | |
) %>% | |
select(-track_title) | |
) | |
# Vader ------------------------------------------------------------------------ | |
library(vader) | |
bs_vader <- bs_studio %>% | |
mutate( | |
vader = map(lyric, get_vader) | |
) %>% | |
unnest_wider(vader) | |
# Billboard charts ------------------------------------------------------------- | |
page <- read_html("https://en.wikipedia.org/wiki/Bruce_Springsteen_discography") | |
albums <- html_node(page, ".wikitable") | |
albums <- html_table(albums, fill = TRUE, header = TRUE) %>% | |
janitor::row_to_names(1) | |
albums <- albums[1:20, ] %>% | |
janitor::clean_names() %>% | |
rename(album_name = title) | |
# Join all together ------------------------------------------------------------ | |
library(fuzzyjoin) | |
# Join the data frames on a maximum string distance of 2 | |
bs_full <- stringdist_join( | |
bs_sentiment %>% | |
mutate( | |
album_name = ifelse(album_name == "We Shall Overcome: The Seeger Sessions (American Land Edition)", "We Shall Overcome: The Seeger Sessions", album_name) | |
), | |
albums, | |
by = c("album_name" = "album_name"), | |
max_dist = 2, | |
distance_col = "distance", | |
ignore_case = TRUE | |
) %>% | |
rename(album_name = album_name.x) %>% | |
select(-album_name.y) | |
# Subjective list of best Bruce Springsteen songs ------------------------------ | |
artist_url <- "https://www.nj.com/entertainment/music/2017/10/bruce_springsteen_songs_ranked_springsteen_on_broa.html" | |
track_album_list <- | |
read_html(artist_url) %>% | |
html_nodes(".article__paragraph--left") %>% | |
html_text() %>% | |
tibble() %>% | |
janitor::clean_names() %>% | |
filter(str_detect(x, "^\\d+")) %>% | |
transmute( | |
rank = str_extract(x, "^\\d+"), | |
track_name = str_remove_all(x, "[:digit:]."), | |
track_name = str_remove(track_name, "."), | |
track_name = str_remove_all(track_name, '"'), | |
album_name_clean = str_split(track_name, "-", simplify = TRUE)[, 2], | |
track_name_clean = str_split(track_name, "-", simplify = TRUE)[, 1], | |
album_name = str_replace_all(str_trim(str_to_lower(album_name_clean), "both"), " ", "_"), | |
track_name = str_replace_all(str_trim(str_to_lower(track_name_clean), "both"), " ", ""), | |
track_name = str_remove_all(track_name, "[:punct:]") | |
) | |
bs_full2 <- track_album_list %>% | |
mutate(rank = as.numeric(rank)) %>% | |
group_by(album_name_clean) %>% | |
slice_min(order_by = rank, n = 1) %>% | |
ungroup() %>% | |
distinct(track_name_clean, track_name) %>% | |
right_join(bs_full) | |
# Plot prep -------------------------------------------------------------------- | |
vibe_df <- tibble( | |
vibe = c("Turbulent/Angry", "Happy/Joyful", "Sad/Depressing", "Chill/Peacefull"), | |
energy = c(0.6, 0.6, 0.4, 0.4), | |
valence = c(0.15, 0.87, 0.15, 0.87), | |
album_name = "1973 - Greetings from Asbury Park, N.J." | |
) | |
bs_p <- bs_full2 %>% | |
mutate( | |
ave_vader = as.numeric(compound), | |
track_number = as.numeric(track_number), | |
ave_sentiment = as.numeric(ave_sentiment), | |
track_title = str_to_title(glue("{track_number} - {track_name_original}")), | |
album_name = glue("{album_release_year} - {album_name}"), | |
track_title = | |
reorder_within(track_title, -track_number, within = album_name), | |
duration_ms = duration_ms / 60000 | |
) %>% | |
group_by( | |
track_title, energy, valence, album_name, duration_ms, ave_sentiment, | |
danceability, track_name_clean | |
) %>% | |
summarise(ave_vader = mean(ave_vader, na.rm = TRUE)) %>% | |
ungroup() %>% | |
distinct( | |
track_title, energy, valence, album_name, duration_ms, ave_sentiment, | |
ave_vader, danceability, track_name_clean | |
) | |
bs_p <- bs_p %>% | |
mutate(ave_vader = ifelse(is.na(ave_vader), 0, ave_vader)) %>% | |
group_by(track_title) %>% | |
slice_max(order_by = energy, n = 1, with_ties = FALSE) %>% | |
ungroup() | |
# Plot ------------------------------------------------------------------------- | |
bs_p1 <- bs_p %>% | |
ggplot() + | |
geom_vline(aes(xintercept = 0.5)) + | |
geom_hline(aes(yintercept = 0.5)) + | |
geom_point(aes(y = energy, x = valence, size = duration_ms, color = ave_vader), | |
alpha = 0.75 | |
) + | |
geom_text(aes(y = energy, x = valence, label = vibe), | |
data = vibe_df, size = 3.5, | |
color = "#460069" | |
) + | |
# geom_text( | |
# aes(y = energy, x = valence, label = track_name_clean), | |
# show.legend = FALSE, na.rm = TRUE | |
# ) + | |
facet_wrap(~album_name, | |
labeller = label_wrap_gen(width = 45, multi_line = TRUE) | |
) + | |
theme_alex() + | |
theme( | |
panel.grid.minor = element_blank(), | |
panel.grid.major.x = element_line( | |
color = "#cbcbcb" | |
), | |
panel.background = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.caption = element_text(hjust = 0.5), | |
# legend.text.align = 0, | |
legend.background = element_blank(), | |
legend.title = element_text( | |
color = "#460069" | |
), | |
legend.key = element_blank(), | |
legend.text = element_text( | |
color = "#460069" | |
), | |
axis.title.x = element_text(hjust = 1, margin = margin(5, 0, 0, 0)), | |
axis.title.y = element_text(hjust = 1, margin = margin(0, 15, 0, 0)) | |
) + | |
scale_color_viridis_c( | |
option = "magma" | |
) + | |
scale_size_continuous(range = c(0, 8)) + | |
labs( | |
x = "Valence", | |
y = "Energy", | |
title = "The music of Bruce Springsteen", | |
subtitle = "Valence describes the musical positiveness conveyed by a track.\nEnergy represents a perceptual measure of intensity and activity.\nVADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool.\nA high sentiment score indicate positive lyrics.", | |
caption = "Source:\nGenius.com\nSpotify.com", | |
color = "VADER lyric\nsentiment\nscore:", | |
size = "Song length\n(minutes):" | |
) | |
bs_p1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Made public