Skip to content

Instantly share code, notes, and snippets.

@farach
Created September 13, 2021 00:19
Show Gist options
  • Save farach/ad5927ccdcdad68d2cb6f0e4fe849828 to your computer and use it in GitHub Desktop.
Save farach/ad5927ccdcdad68d2cb6f0e4fe849828 to your computer and use it in GitHub Desktop.
Bruce Springsteen music analysis plot
library(spotifyr)
library(tidyverse)
library(lubridate)
library(glue)
library(geniusr)
library(rvest)
library(tidytext)
# Create a ggplot2 theme
theme_alex <- function() {
font <- "Arial"
theme_minimal()
theme(
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(
color = "#cbcbcb"
),
panel.grid.major.x = element_blank(),
panel.background = element_blank(),
strip.background = element_rect(
fill = "white"
),
strip.text = element_text(
hjust = 0,
color = "#460069",
size = 12
),
axis.ticks = element_blank(),
plot.title = element_text(
family = font,
size = 20,
face = "bold",
color = "#460069"
),
plot.subtitle = element_text(
family = font,
size = 14,
color = "#6a1c91",
hjust = 0.5
),
plot.caption = element_text(
family = font,
size = 9,
hjust = 1,
color = "#460069"
),
axis.title = element_text(
family = font,
size = 10,
color = "#460069"
),
axis.text = element_text(
family = font,
size = 9,
color = "#460069"
),
axis.text.x = element_text(
margin = margin(5, b = 10)
),
legend.text.align = 0,
legend.background = element_blank()
)
}
# Spotify ----------------------------------------------------------------------
Sys.setenv(SPOTIFY_CLIENT_ID = ".....")
Sys.setenv(SPOTIFY_CLIENT_SECRET = ".....")
access_token <- get_spotify_access_token()
bs_spotify <- get_artist_audio_features("bruce springsteen")
bs_spotify %>%
distinct(album_release_year, album_name) %>%
arrange(album_release_year) %>%
transmute(
year = album_release_year,
album_name = str_to_lower(str_replace_all(album_name, " ", "_"))
) %>%
distinct()
# Genius -----------------------------------------------------------------------
library(genius)
bs_albums <- c(
"Greetings from Asbury Park, N.J.",
"The Wild, the Innocent & the E Street Shuffle",
"Born To Run",
"Darkness On the Edge of Town",
"The River",
"Nebraska",
"Born in the U.S.A.",
"Tunnel of Love",
"Human Touch",
"Lucky Town",
"The Ghost of Tom Joad",
"The Rising",
"Devils Dust",
"We Shall Overcome: The Seeger Sessions",
"Magic",
"Working on a Dream",
"Wrecking Ball",
"High Hopes",
"Western Stars",
"Letter to You"
)
bs_genius <- tibble()
for (i in bs_albums) {
bs_genius_loop <- genius_album(artist = "Bruce Springsteen", album = i) %>%
mutate(album = i)
bs_genius <- bind_rows(bs_genius, bs_genius_loop)
}
# Combine Genius and Spotify ---------------------------------------------------
bs_studio <- bs_spotify %>%
mutate(album_name = str_trim(album_name, "both")) %>%
filter(album_name %in% c(
"Greetings from Asbury Park, N.J.",
"The Wild, the Innocent & The E Street Shuffle",
"Born To Run",
"Darkness On the Edge of Town",
"The River",
"Nebraska",
"Born In The U.S.A.",
"Tunnel Of Love",
"Human Touch",
"Lucky Town",
"The Ghost Of Tom Joad",
"The Rising",
"Devils & Dust",
"We Shall Overcome: The Seeger Sessions (American Land Edition)",
"Magic",
"Working On A Dream",
"Wrecking Ball",
"High Hopes",
"Western Stars",
"Letter To You"
)) %>%
mutate(
track_name_original = track_name,
track_name = str_replace_all(str_trim(str_to_lower(track_name), "both"), " ", ""),
track_name = str_remove_all(track_name, "[:punct:]"),
track_number = as.numeric(track_number),
track_title = str_to_title(glue("{track_number} - {track_name}")),
track_title =
reorder_within(track_title, -track_number, within = album_name) # ,
# album_name = paste(album_release_year, "-", album_name)
) %>%
left_join(
bs_genius %>%
filter(album != "Born in the U.S.A") %>%
distinct() %>%
mutate(
track_name = str_replace_all(str_trim(str_to_lower(track_title), "both"), " ", ""),
track_name = str_remove_all(track_name, "[:punct:]")
) %>%
select(-track_title)
)
# Vader ------------------------------------------------------------------------
library(vader)
bs_vader <- bs_studio %>%
mutate(
vader = map(lyric, get_vader)
) %>%
unnest_wider(vader)
# Billboard charts -------------------------------------------------------------
page <- read_html("https://en.wikipedia.org/wiki/Bruce_Springsteen_discography")
albums <- html_node(page, ".wikitable")
albums <- html_table(albums, fill = TRUE, header = TRUE) %>%
janitor::row_to_names(1)
albums <- albums[1:20, ] %>%
janitor::clean_names() %>%
rename(album_name = title)
# Join all together ------------------------------------------------------------
library(fuzzyjoin)
# Join the data frames on a maximum string distance of 2
bs_full <- stringdist_join(
bs_sentiment %>%
mutate(
album_name = ifelse(album_name == "We Shall Overcome: The Seeger Sessions (American Land Edition)", "We Shall Overcome: The Seeger Sessions", album_name)
),
albums,
by = c("album_name" = "album_name"),
max_dist = 2,
distance_col = "distance",
ignore_case = TRUE
) %>%
rename(album_name = album_name.x) %>%
select(-album_name.y)
# Subjective list of best Bruce Springsteen songs ------------------------------
artist_url <- "https://www.nj.com/entertainment/music/2017/10/bruce_springsteen_songs_ranked_springsteen_on_broa.html"
track_album_list <-
read_html(artist_url) %>%
html_nodes(".article__paragraph--left") %>%
html_text() %>%
tibble() %>%
janitor::clean_names() %>%
filter(str_detect(x, "^\\d+")) %>%
transmute(
rank = str_extract(x, "^\\d+"),
track_name = str_remove_all(x, "[:digit:]."),
track_name = str_remove(track_name, "."),
track_name = str_remove_all(track_name, '"'),
album_name_clean = str_split(track_name, "-", simplify = TRUE)[, 2],
track_name_clean = str_split(track_name, "-", simplify = TRUE)[, 1],
album_name = str_replace_all(str_trim(str_to_lower(album_name_clean), "both"), " ", "_"),
track_name = str_replace_all(str_trim(str_to_lower(track_name_clean), "both"), " ", ""),
track_name = str_remove_all(track_name, "[:punct:]")
)
bs_full2 <- track_album_list %>%
mutate(rank = as.numeric(rank)) %>%
group_by(album_name_clean) %>%
slice_min(order_by = rank, n = 1) %>%
ungroup() %>%
distinct(track_name_clean, track_name) %>%
right_join(bs_full)
# Plot prep --------------------------------------------------------------------
vibe_df <- tibble(
vibe = c("Turbulent/Angry", "Happy/Joyful", "Sad/Depressing", "Chill/Peacefull"),
energy = c(0.6, 0.6, 0.4, 0.4),
valence = c(0.15, 0.87, 0.15, 0.87),
album_name = "1973 - Greetings from Asbury Park, N.J."
)
bs_p <- bs_full2 %>%
mutate(
ave_vader = as.numeric(compound),
track_number = as.numeric(track_number),
ave_sentiment = as.numeric(ave_sentiment),
track_title = str_to_title(glue("{track_number} - {track_name_original}")),
album_name = glue("{album_release_year} - {album_name}"),
track_title =
reorder_within(track_title, -track_number, within = album_name),
duration_ms = duration_ms / 60000
) %>%
group_by(
track_title, energy, valence, album_name, duration_ms, ave_sentiment,
danceability, track_name_clean
) %>%
summarise(ave_vader = mean(ave_vader, na.rm = TRUE)) %>%
ungroup() %>%
distinct(
track_title, energy, valence, album_name, duration_ms, ave_sentiment,
ave_vader, danceability, track_name_clean
)
bs_p <- bs_p %>%
mutate(ave_vader = ifelse(is.na(ave_vader), 0, ave_vader)) %>%
group_by(track_title) %>%
slice_max(order_by = energy, n = 1, with_ties = FALSE) %>%
ungroup()
# Plot -------------------------------------------------------------------------
bs_p1 <- bs_p %>%
ggplot() +
geom_vline(aes(xintercept = 0.5)) +
geom_hline(aes(yintercept = 0.5)) +
geom_point(aes(y = energy, x = valence, size = duration_ms, color = ave_vader),
alpha = 0.75
) +
geom_text(aes(y = energy, x = valence, label = vibe),
data = vibe_df, size = 3.5,
color = "#460069"
) +
# geom_text(
# aes(y = energy, x = valence, label = track_name_clean),
# show.legend = FALSE, na.rm = TRUE
# ) +
facet_wrap(~album_name,
labeller = label_wrap_gen(width = 45, multi_line = TRUE)
) +
theme_alex() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(
color = "#cbcbcb"
),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.caption = element_text(hjust = 0.5),
# legend.text.align = 0,
legend.background = element_blank(),
legend.title = element_text(
color = "#460069"
),
legend.key = element_blank(),
legend.text = element_text(
color = "#460069"
),
axis.title.x = element_text(hjust = 1, margin = margin(5, 0, 0, 0)),
axis.title.y = element_text(hjust = 1, margin = margin(0, 15, 0, 0))
) +
scale_color_viridis_c(
option = "magma"
) +
scale_size_continuous(range = c(0, 8)) +
labs(
x = "Valence",
y = "Energy",
title = "The music of Bruce Springsteen",
subtitle = "Valence describes the musical positiveness conveyed by a track.\nEnergy represents a perceptual measure of intensity and activity.\nVADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool.\nA high sentiment score indicate positive lyrics.",
caption = "Source:\nGenius.com\nSpotify.com",
color = "VADER lyric\nsentiment\nscore:",
size = "Song length\n(minutes):"
)
bs_p1
@farach
Copy link
Author

farach commented Sep 13, 2021

Made public

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment