Last active
September 6, 2021 14:28
-
-
Save farach/274ccfa6f18c441bc9468f6e9793a4c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(tidytext) | |
library(vader) | |
library(glue) | |
theme_alex <- function() { | |
font <- "Arial" | |
theme_minimal() | |
theme( | |
strip.text = element_text( | |
hjust = 0, | |
color = "#460069", | |
size = 12 | |
), | |
axis.ticks = element_blank(), | |
plot.title = element_text( | |
family = font, | |
size = 20, | |
face = "bold", | |
color = "#460069" | |
), | |
plot.subtitle = element_text( | |
family = font, | |
size = 14, | |
color = "#6a1c91", | |
hjust = 0.5 | |
), | |
plot.caption = element_text( | |
family = font, | |
size = 9, | |
hjust = 1, | |
color = "#460069" | |
), | |
axis.title = element_text( | |
family = font, | |
size = 10, | |
color = "#460069" | |
), | |
axis.text = element_text( | |
family = font, | |
size = 9, | |
color = "#460069" | |
), | |
axis.text.x = element_text( | |
margin = margin(5, b = 10) | |
), | |
legend.text.align = 0, | |
legend.background = element_blank(), | |
legend.title = element_blank(), | |
legend.key = element_blank(), | |
legend.text = element_text( | |
family = font, | |
size = 18, | |
color = "#4B636E" | |
) | |
) | |
} | |
joni_spotify <- readRDS(url("https://github.com/farach/data/blob/master/joni_spotify.rds?raw=true", "rb")) | |
joni_genius_df <- readRDS(url("https://github.com/farach/data/blob/master/joni_genius_df.rds?raw=true", "rb")) | |
joni_genius_clean <- joni_genius_df %>% | |
filter( | |
!album_name %in% c( | |
"Shadows and Light", "Travelogue", "Both Sides Now" | |
), | |
!is.na(lyric) | |
) %>% | |
group_by(album_name, track_n, artist, track_title) %>% | |
summarise(lyric = glue_collapse(lyric, sep = " ")) %>% | |
ungroup() %>% | |
mutate( | |
track_title = tolower( | |
str_trim(str_remove(track_title, "\\s*\\([^\\)]+\\)\\s*$"), side = "both") | |
) | |
) | |
joni_spotify_clean <- joni_spotify %>% | |
mutate( | |
track_title = tolower( | |
str_trim(str_remove(track_name, "\\s*\\([^\\)]+\\)\\s*$"), side = "both") | |
), | |
track_title = str_remove(track_title, " - live") | |
) %>% | |
filter( | |
!album_name %in% c( | |
"Shadows and Light", "Travelogue", "Both Sides Now", | |
"Shine [Standard Jewel - Parts Order Only]" | |
), | |
!track_title %in% c( | |
"Coin In The Pocket (Rap)", "Funeral (Rap)", | |
"Happy Birthday 1975 (Rap)", "I's A Muggin' (Rap)", | |
"Lucky (Rap)" | |
) | |
) | |
joni_full_clean <- joni_spotify_clean %>% | |
right_join(joni_genius_clean %>% select(-album_name)) %>% | |
select(album_name, track_title, lyric, track_number, valence, | |
album_release_year) %>% | |
distinct() %>% | |
filter(!is.na(album_name)) | |
joni_vader <- joni_full_clean %>% | |
mutate( | |
vader = map(lyric, get_vader), | |
track_title = str_trim(str_remove(track_title, "\\s*\\([^\\)]+\\)\\s*$"), | |
side = "both") | |
) %>% | |
unnest_wider(vader) | |
joni_p <- joni_vader %>% | |
mutate( | |
track_number = as.numeric(track_number), | |
compound = as.numeric(compound), | |
track_title = str_to_title(glue("{track_number} - {track_title}")), | |
track_title = | |
reorder_within(track_title, -track_number, within = album_name), | |
album_name = paste(album_release_year, "-", album_name) | |
) %>% | |
distinct(track_title, compound, valence, album_name) | |
joni_p_final <- joni_p %>% | |
mutate( | |
valence_scaled = scales::rescale(valence, to = c(-1, 1)), | |
valence_1 = "Spotify valence score", | |
compound_1 = "VADER compound score" | |
) %>% | |
group_by(track_title) %>% | |
add_count() %>% | |
filter(valence == max(valence)) %>% | |
ungroup() %>% | |
ggplot(aes(x = track_title, y = compound)) + | |
geom_segment(aes( | |
x = track_title, | |
xend = track_title, | |
y = valence_scaled, | |
yend = compound | |
), | |
linetype = "dashed", | |
alpha = 0.75 | |
) + | |
geom_point(aes(color = compound_1), size = 2) + | |
geom_point(aes(y = valence_scaled, color = valence_1), size = 2) + | |
facet_wrap(~album_name, scales = "free_y") + | |
theme_alex() + | |
theme( | |
legend.position = "bottom", | |
panel.grid.minor = element_blank(), | |
panel.grid.major.x = element_line( | |
color = "#cbcbcb" | |
), | |
panel.grid.major.y = element_blank(), | |
panel.background = element_blank(), | |
plot.title = element_text(hjust = 0.5) | |
) + | |
scale_x_reordered() + | |
scale_color_manual(values = c("steelblue", "red")) + | |
coord_flip() + | |
labs( | |
x = NULL, | |
y = "Normalized valence and vader score", | |
title = "VADER sentiment vs. Spotify valence score of Joni Mitchell songs by album", | |
subtitle = "Valence describes the musical positiveness conveyed by a track. Tracks with high valence sound more\npositive (happy, cheerful, euphoric), while tracks with low valence sound more negative (sad, depressed, angry).\nVADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis\ntool that is specifically attuned to sentiments expressed in social media, and works well on texts from other domains.", | |
caption = "Source:\nGenius.com\nSpotify.com" | |
) + | |
guides(color = guide_legend(override.aes = list(size = 5))) | |
ggsave("./joni_p_final.png", plot = joni_p_final, width = 6000, height = 4000, | |
units = "px") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment