Skip to content

Instantly share code, notes, and snippets.

@shijithpk
Created January 24, 2024 20:49
Show Gist options
  • Save shijithpk/19002c9918d06db637e4786c574658df to your computer and use it in GitHub Desktop.
Save shijithpk/19002c9918d06db637e4786c574658df to your computer and use it in GitHub Desktop.
script for updating spotify playlists of various bbc music shows
# contents of bbc_update_urls.csv
# show,episode_url,episode_date
# radio2_folk_show,https://www.bbc.co.uk/sounds/play/m001v4yp,2024-01-17
# radio6_new_music,https://www.bbc.co.uk/sounds/play/m001v50p,2024-01-19
# radio3_music_planet,https://www.bbc.co.uk/sounds/play/m001v3yn,2024-01-13
# radio1_best_new_pop,https://www.bbc.co.uk/sounds/play/m001v4rp,2024-01-19
# radio1_rock_show,https://www.bbc.co.uk/sounds/play/m001v2mp,2024-01-15
# radio6_the_morning_after_mix,https://www.bbc.co.uk/sounds/play/m001v4jv,2024-01-14
# radio1_the_chillest_show,https://www.bbc.co.uk/sounds/play/m001v2r0,2024-01-14
# radio6_gilles_peterson_show,https://www.bbc.co.uk/sounds/play/m001v4hp,2024-01-13
#below is the actual script
import requests
import re
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import cred
import time
import pandas as pd
from lxml import html
import json
from datetime import datetime
import random
delay = random.randint(0, 180)
time.sleep(delay)
scope = "playlist-read-private playlist-modify-private playlist-modify-public user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=cred.client_id , client_secret= cred.client_secret ,redirect_uri=cred.redirect_url, scope=scope, open_browser=False))
show_dict = {
'radio1_best_new_pop':{'url_part':'b07zc3js','playlist_id':'3t7laH7XOcbCFN1oItqdHo'},
'radio2_folk_show':{'url_part':'b01phglj','playlist_id':'5DjFGSMemnQlto1iopbULA'},
'radio3_music_planet':{'url_part':'b09ymx3v','playlist_id':'197QzBV7LFn6o4rFbUe1kJ'},
'radio1_rock_show':{'url_part':'b006wq4s','playlist_id':'2rjHoY4rckW70ChgsK1JUc'},
'radio6_the_morning_after_mix':{'url_part':'p071z8z5','playlist_id':'3OFAXHgkjxsJ1tGBBzKWCt'},
'radio1_the_chillest_show':{'url_part':'b03hjfww','playlist_id':'4x1Qroq1N7F4mwsfKC2oPJ'},
'radio6_gilles_peterson_show':{'url_part':'b01fm4ss','playlist_id':'2hh8x1bPsgPpxwhIV0muAn'},
}
bbc_update_urls_df = pd.read_csv('bbc_update_urls.csv')
for show in show_dict:
last_week_episode_url = bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_url'].values[0]
last_week_episode_date_raw = bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_date'].values[0]
last_week_episode_date_object = datetime.strptime(last_week_episode_date_raw, '%Y-%m-%d')
playlist_id = show_dict[show]['playlist_id']
#counting number of tracks in playlist
results = sp.playlist_items(playlist_id, offset=0, market='IN')
items = results['items']
while results['next']:
time.sleep(5)
results = sp.next(results)
items.extend(results['items'])
#delete last 40 items if number of tracks in playlist over 9960
#since tracks from updates are inserted at the top, the oldest tracks will be at the bottom and will be removed
if len(items) >= 9960:
last_40_items = items[-40:]
last_40_ids = []
for item in last_40_items:
track_idx = item['track']['id']
last_40_ids.append(track_idx)
sp.playlist_remove_all_occurrences_of_items(playlist_id, last_40_ids)
track_spotify_id_list = []
show_url = 'https://www.bbc.co.uk/sounds/brand/' + show_dict[show]['url_part']
page = requests.get(show_url)
tree = html.fromstring(page.content)
list_of_episodes = tree.xpath("//a[contains(@class,'sc-c-playable-list-card__link')]")
new_date_object_list = []
new_episode_url_list = []
for episode in list_of_episodes:
episode_url = 'https://www.bbc.co.uk' + episode.xpath("./@href")[0]
episode_title = episode.xpath(".//span[contains(@class,'sc-o-link__text')]/text()")[0]
episode_date_raw = episode.xpath(".//li[contains(@aria-label,'release date')]/text()")[0]
episode_date_object = datetime.strptime(episode_date_raw, '%d %b %Y')
if episode_date_object > last_week_episode_date_object:
new_date_object_list.append(episode_date_object)
new_episode_url_list.append(episode_url)
page2 = requests.get(episode_url)
tree2 = html.fromstring(page2.content)
try:
script_text = tree2.xpath("//script[contains(text(),'commercial-music-service-spotify')]/text()")[0]
except:
continue
regex_pattern = r" window.__PRELOADED_STATE__ = (.*); "
tracks_json_string = re.match(regex_pattern, script_text).group(1)
tracks_json = json.loads(tracks_json_string)
tracklist = tracks_json['tracklist']['tracks']
for track in tracklist:
try:
spotify_uri = track['uris'][0]['uri']
regex_pattern_2 = r"https://open.spotify.com/track/(.*)"
track_id = re.match(regex_pattern_2, spotify_uri).group(1)
track_spotify_id_list.append(track_id)
except:
continue
if new_date_object_list:
newest_episode_date_object = max(new_date_object_list)
newest_episode_date = newest_episode_date_object.strftime('%Y-%m-%d')
newest_index = new_date_object_list.index(newest_episode_date_object)
newest_episode_url = new_episode_url_list[newest_index]
#updating url in bbc df
bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_url'] = newest_episode_url
bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_date'] = newest_episode_date
if track_spotify_id_list:
sp.playlist_add_items(playlist_id, track_spotify_id_list, position=0)
bbc_update_urls_df.to_csv('bbc_update_urls.csv', index=False, encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment