Last active
June 1, 2024 12:04
-
-
Save Aurorum/32b3fc716ef1a93746debb499623829b to your computer and use it in GitHub Desktop.
How many times does Taylor Swift sing "love" in her back catalog?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* For The Spectator internship application. | |
* - Counts the number of exact matches that Taylor Swift includes "love" in her studio albums. | |
* | |
* I decided to scrape the lyrics rather than use an API so that it would be easier to run | |
* without any hassle around API keys. You can simply copy and paste the script below into | |
* Google Apps Scripts and execute it. | |
* | |
* We can also change the regex on Line 38 to include words like "loved", "loving", etc. | |
* | |
* Final count: 491 occurrences of "love". | |
*/ | |
function countLoveOccurrences() { | |
// We first manually set the studio albums that we want to find lyrics for. | |
// This ensures we don't include things like her live albums, special releases, remixes, or covers. | |
// It also means we avoid counting the Taylor's Version songs twice. | |
let albumSlugs = [ | |
// Slugs are for genius.com. | |
'Taylor-swift', // Debut | |
'Fearless-taylor-s-version', // Fearless TV | |
'Speak-now-taylors-version', // Speak Now TV | |
'Red-taylors-version', // Red TV | |
'1989-taylors-version-deluxe', // 1989 TV deluxe | |
'Reputation', // reputation | |
'Lover', // Lover | |
'Folklore-deluxe-version', // folklore deluxe | |
'Evermore-deluxe-version', // evermore deluxe | |
'Midnights-3am-edition', // Midnights 3am edition | |
'The-tortured-poets-department-the-anthology', // Tortured Poets anthology | |
]; | |
// Now let's retrieve an array of all of Taylor's lyrics. Each song is a separate item. | |
let allLyrics = findLyricsForAlbum( albumSlugs ); | |
// And lastly, we count exact non-case sensitive occurrences of the word "love". | |
const regex = /\blove\b/gi; | |
let count = 0; | |
allLyrics.forEach( ( item ) => { | |
const matches = item.match( regex ); | |
if ( matches ) { | |
count += matches.length; | |
} | |
} ); | |
console.log( "Number of occurrences of 'love': " + count ); | |
} | |
function findLyricsForAlbum( albumSlugs ) { | |
let songLinks = []; | |
let songLyrics = []; | |
albumSlugs.forEach( ( slug ) => { | |
let links = fetchSongsFromAlbum( 'https://genius.com/albums/Taylor-swift/' + slug ); | |
if ( links ) { | |
songLinks.push( ...links ); | |
} | |
} ); | |
songLinks.forEach( ( song ) => { | |
let lyrics = fetchIndividualSongLyrics( song ); | |
songLyrics.push( lyrics ); | |
// Log lyrics to demonstrate script is working. | |
console.log( lyrics ); | |
} ); | |
return songLyrics; | |
} | |
function fetchSongsFromAlbum( url ) { | |
const response = UrlFetchApp.fetch( url ); | |
if ( response.getResponseCode() == 200 ) { | |
let content = response.getContentText(); | |
// Strip content from outside the track list. | |
const startIndex = content.indexOf( 'column_layout-column_span--primary' ); | |
const endIndex = content.indexOf( 'breadcrumbs', startIndex ); | |
content = content.substring( startIndex, endIndex ); | |
// Gather links to lyrics pages on the album page. | |
const regex = /<a\s+(?:[^>]*?\s+)?href=(["'])(.*?)\1/g; | |
const songLinks = []; | |
let match; | |
while ( ( match = regex.exec( content ) ) !== null ) { | |
const link = match[ 2 ]; | |
if ( link.includes( 'genius.com/Taylor-swift' ) && link.includes( '-lyrics' ) ) { | |
songLinks.push( link ); | |
} | |
} | |
// Returns an array of links to the lyrics of songs on the album. | |
return songLinks; | |
} else { | |
console.log( 'Failed to fetch album. Error code: ' + response.getResponseCode() ); | |
return null; | |
} | |
} | |
function fetchIndividualSongLyrics( url ) { | |
const response = UrlFetchApp.fetch( url ); | |
if ( response.getResponseCode() == 200 ) { | |
const content = response.getContentText(); | |
// Find the lyrics section on the Genius page. | |
const startIndex = content.indexOf( 'data-lyrics-container' ); | |
const endIndex = content.indexOf( 'LyricsFooter__Container', startIndex ); | |
let lyrics = content.substring( startIndex, endIndex ); | |
// Replace the HTML line break with new line. | |
lyrics = lyrics.replace( /<br\/>/g, '\n' ); | |
// Strip the HTML so we just have the lyrics. | |
lyrics = lyrics.replace( /<[^>]+>/g, '' ); | |
// Remove the leftover HTML from finding the lyrics. | |
lyrics = lyrics.replace( /[^>]*>([^<]*)<[^>]*/g, '$1' ); | |
// Decode entities. | |
lyrics = decodeEntities( lyrics ); | |
return lyrics; | |
} else { | |
console.log( 'Failed to fetch the page. Error code: ' + response.getResponseCode() + url ); | |
return null; | |
} | |
} | |
function decodeEntities( string ) { | |
return XmlService.parse( '<d>' + string + '</d>' ) | |
.getRootElement() | |
.getText(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment