Skip to content

Instantly share code, notes, and snippets.

@farach
Last active September 6, 2021 14:28
Show Gist options
  • Save farach/274ccfa6f18c441bc9468f6e9793a4c1 to your computer and use it in GitHub Desktop.
Save farach/274ccfa6f18c441bc9468f6e9793a4c1 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(tidytext)
library(vader)
library(glue)
theme_alex <- function() {
font <- "Arial"
theme_minimal()
theme(
strip.text = element_text(
hjust = 0,
color = "#460069",
size = 12
),
axis.ticks = element_blank(),
plot.title = element_text(
family = font,
size = 20,
face = "bold",
color = "#460069"
),
plot.subtitle = element_text(
family = font,
size = 14,
color = "#6a1c91",
hjust = 0.5
),
plot.caption = element_text(
family = font,
size = 9,
hjust = 1,
color = "#460069"
),
axis.title = element_text(
family = font,
size = 10,
color = "#460069"
),
axis.text = element_text(
family = font,
size = 9,
color = "#460069"
),
axis.text.x = element_text(
margin = margin(5, b = 10)
),
legend.text.align = 0,
legend.background = element_blank(),
legend.title = element_blank(),
legend.key = element_blank(),
legend.text = element_text(
family = font,
size = 18,
color = "#4B636E"
)
)
}
joni_spotify <- readRDS(url("https://github.com/farach/data/blob/master/joni_spotify.rds?raw=true", "rb"))
joni_genius_df <- readRDS(url("https://github.com/farach/data/blob/master/joni_genius_df.rds?raw=true", "rb"))
joni_genius_clean <- joni_genius_df %>%
filter(
!album_name %in% c(
"Shadows and Light", "Travelogue", "Both Sides Now"
),
!is.na(lyric)
) %>%
group_by(album_name, track_n, artist, track_title) %>%
summarise(lyric = glue_collapse(lyric, sep = " ")) %>%
ungroup() %>%
mutate(
track_title = tolower(
str_trim(str_remove(track_title, "\\s*\\([^\\)]+\\)\\s*$"), side = "both")
)
)
joni_spotify_clean <- joni_spotify %>%
mutate(
track_title = tolower(
str_trim(str_remove(track_name, "\\s*\\([^\\)]+\\)\\s*$"), side = "both")
),
track_title = str_remove(track_title, " - live")
) %>%
filter(
!album_name %in% c(
"Shadows and Light", "Travelogue", "Both Sides Now",
"Shine [Standard Jewel - Parts Order Only]"
),
!track_title %in% c(
"Coin In The Pocket (Rap)", "Funeral (Rap)",
"Happy Birthday 1975 (Rap)", "I's A Muggin' (Rap)",
"Lucky (Rap)"
)
)
joni_full_clean <- joni_spotify_clean %>%
right_join(joni_genius_clean %>% select(-album_name)) %>%
select(album_name, track_title, lyric, track_number, valence,
album_release_year) %>%
distinct() %>%
filter(!is.na(album_name))
joni_vader <- joni_full_clean %>%
mutate(
vader = map(lyric, get_vader),
track_title = str_trim(str_remove(track_title, "\\s*\\([^\\)]+\\)\\s*$"),
side = "both")
) %>%
unnest_wider(vader)
joni_p <- joni_vader %>%
mutate(
track_number = as.numeric(track_number),
compound = as.numeric(compound),
track_title = str_to_title(glue("{track_number} - {track_title}")),
track_title =
reorder_within(track_title, -track_number, within = album_name),
album_name = paste(album_release_year, "-", album_name)
) %>%
distinct(track_title, compound, valence, album_name)
joni_p_final <- joni_p %>%
mutate(
valence_scaled = scales::rescale(valence, to = c(-1, 1)),
valence_1 = "Spotify valence score",
compound_1 = "VADER compound score"
) %>%
group_by(track_title) %>%
add_count() %>%
filter(valence == max(valence)) %>%
ungroup() %>%
ggplot(aes(x = track_title, y = compound)) +
geom_segment(aes(
x = track_title,
xend = track_title,
y = valence_scaled,
yend = compound
),
linetype = "dashed",
alpha = 0.75
) +
geom_point(aes(color = compound_1), size = 2) +
geom_point(aes(y = valence_scaled, color = valence_1), size = 2) +
facet_wrap(~album_name, scales = "free_y") +
theme_alex() +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(
color = "#cbcbcb"
),
panel.grid.major.y = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5)
) +
scale_x_reordered() +
scale_color_manual(values = c("steelblue", "red")) +
coord_flip() +
labs(
x = NULL,
y = "Normalized valence and vader score",
title = "VADER sentiment vs. Spotify valence score of Joni Mitchell songs by album",
subtitle = "Valence describes the musical positiveness conveyed by a track. Tracks with high valence sound more\npositive (happy, cheerful, euphoric), while tracks with low valence sound more negative (sad, depressed, angry).\nVADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis\ntool that is specifically attuned to sentiments expressed in social media, and works well on texts from other domains.",
caption = "Source:\nGenius.com\nSpotify.com"
) +
guides(color = guide_legend(override.aes = list(size = 5)))
ggsave("./joni_p_final.png", plot = joni_p_final, width = 6000, height = 4000,
units = "px")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment