shijithpk · January 24, 2024 20:49
diff --git a/bbc_update.py b/bbc_update.py
 # contents of bbc_update_urls.csv
 # show,episode_url,episode_date
 # radio2_folk_show,https://www.bbc.co.uk/sounds/play/m001v4yp,2024-01-17
 # radio6_new_music,https://www.bbc.co.uk/sounds/play/m001v50p,2024-01-19
 # radio3_music_planet,https://www.bbc.co.uk/sounds/play/m001v3yn,2024-01-13
 # radio1_best_new_pop,https://www.bbc.co.uk/sounds/play/m001v4rp,2024-01-19
 # radio1_rock_show,https://www.bbc.co.uk/sounds/play/m001v2mp,2024-01-15
 # radio6_the_morning_after_mix,https://www.bbc.co.uk/sounds/play/m001v4jv,2024-01-14
 # radio1_the_chillest_show,https://www.bbc.co.uk/sounds/play/m001v2r0,2024-01-14
 # radio6_gilles_peterson_show,https://www.bbc.co.uk/sounds/play/m001v4hp,2024-01-13

 #below is the actual script 

 import requests
 import re 
 import spotipy
 from spotipy.oauth2 import SpotifyOAuth
 import cred
 import time
 import pandas as pd
 from lxml import html
 import json
 from datetime import datetime
 import random

 delay = random.randint(0, 180)
 time.sleep(delay)

 scope = "playlist-read-private playlist-modify-private playlist-modify-public user-library-read"
 sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=cred.client_id , client_secret= cred.client_secret ,redirect_uri=cred.redirect_url, scope=scope, open_browser=False))

 show_dict = {
 	'radio1_best_new_pop':{'url_part':'b07zc3js','playlist_id':'3t7laH7XOcbCFN1oItqdHo'},
 	'radio2_folk_show':{'url_part':'b01phglj','playlist_id':'5DjFGSMemnQlto1iopbULA'},
 	'radio3_music_planet':{'url_part':'b09ymx3v','playlist_id':'197QzBV7LFn6o4rFbUe1kJ'},
 	'radio1_rock_show':{'url_part':'b006wq4s','playlist_id':'2rjHoY4rckW70ChgsK1JUc'},
 	'radio6_the_morning_after_mix':{'url_part':'p071z8z5','playlist_id':'3OFAXHgkjxsJ1tGBBzKWCt'},
 	'radio1_the_chillest_show':{'url_part':'b03hjfww','playlist_id':'4x1Qroq1N7F4mwsfKC2oPJ'},
 	'radio6_gilles_peterson_show':{'url_part':'b01fm4ss','playlist_id':'2hh8x1bPsgPpxwhIV0muAn'}, 
 }

 bbc_update_urls_df = pd.read_csv('bbc_update_urls.csv')

 for show in show_dict:
 	last_week_episode_url = bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_url'].values[0]
 	last_week_episode_date_raw = bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_date'].values[0]
 	last_week_episode_date_object = datetime.strptime(last_week_episode_date_raw, '%Y-%m-%d')

 	playlist_id = show_dict[show]['playlist_id']

 	#counting number of tracks in playlist
 	results = sp.playlist_items(playlist_id, offset=0, market='IN')
 	items = results['items']
 	while results['next']:
 		time.sleep(5)
 		results = sp.next(results)
 		items.extend(results['items'])

 	#delete last 40 items if number of tracks in playlist over 9960
 	#since tracks from updates are inserted at the top, the oldest tracks will be at the bottom and will be removed
 	if len(items) >= 9960:
 		last_40_items = items[-40:]
 		last_40_ids = []
 		for item in last_40_items:
 			track_idx = item['track']['id']
 			last_40_ids.append(track_idx)

 		sp.playlist_remove_all_occurrences_of_items(playlist_id, last_40_ids)

 	track_spotify_id_list = []

 	show_url = 'https://www.bbc.co.uk/sounds/brand/' + show_dict[show]['url_part']
 	page = requests.get(show_url)
 	tree = html.fromstring(page.content)
 	list_of_episodes = tree.xpath("//a[contains(@class,'sc-c-playable-list-card__link')]")

 	new_date_object_list = []
 	new_episode_url_list = []

 	for episode in list_of_episodes:
 		episode_url = 'https://www.bbc.co.uk' + episode.xpath("./@href")[0]
 		episode_title = episode.xpath(".//span[contains(@class,'sc-o-link__text')]/text()")[0]
 		episode_date_raw = episode.xpath(".//li[contains(@aria-label,'release date')]/text()")[0]
 		episode_date_object = datetime.strptime(episode_date_raw, '%d %b %Y')

 		if episode_date_object > last_week_episode_date_object:

 			new_date_object_list.append(episode_date_object)
 			new_episode_url_list.append(episode_url)

 			page2 = requests.get(episode_url)
 			tree2 = html.fromstring(page2.content)
 			try:
 				script_text = tree2.xpath("//script[contains(text(),'commercial-music-service-spotify')]/text()")[0]
 			except:
 				continue
 			regex_pattern = r" window.__PRELOADED_STATE__ = (.*); "
 			tracks_json_string = re.match(regex_pattern, script_text).group(1)
 			tracks_json = json.loads(tracks_json_string)
 			tracklist = tracks_json['tracklist']['tracks']
 			for track in tracklist:
 				try:
 					spotify_uri = track['uris'][0]['uri']
 					regex_pattern_2 = r"https://open.spotify.com/track/(.*)"
 					track_id = re.match(regex_pattern_2, spotify_uri).group(1)
 					track_spotify_id_list.append(track_id)
 				except:
 					continue


 	if new_date_object_list:
 		newest_episode_date_object = max(new_date_object_list)
 		newest_episode_date = newest_episode_date_object.strftime('%Y-%m-%d')
 		newest_index = new_date_object_list.index(newest_episode_date_object)
 		newest_episode_url = new_episode_url_list[newest_index]

 		#updating url in bbc df
 		bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_url'] = newest_episode_url
 		bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_date'] = newest_episode_date
 		
 		if track_spotify_id_list:
 			sp.playlist_add_items(playlist_id, track_spotify_id_list, position=0)

 bbc_update_urls_df.to_csv('bbc_update_urls.csv', index=False, encoding='utf-8')
	# contents of bbc_update_urls.csv
	# show,episode_url,episode_date
	# radio2_folk_show,https://www.bbc.co.uk/sounds/play/m001v4yp,2024-01-17
	# radio6_new_music,https://www.bbc.co.uk/sounds/play/m001v50p,2024-01-19
	# radio3_music_planet,https://www.bbc.co.uk/sounds/play/m001v3yn,2024-01-13
	# radio1_best_new_pop,https://www.bbc.co.uk/sounds/play/m001v4rp,2024-01-19
	# radio1_rock_show,https://www.bbc.co.uk/sounds/play/m001v2mp,2024-01-15
	# radio6_the_morning_after_mix,https://www.bbc.co.uk/sounds/play/m001v4jv,2024-01-14
	# radio1_the_chillest_show,https://www.bbc.co.uk/sounds/play/m001v2r0,2024-01-14
	# radio6_gilles_peterson_show,https://www.bbc.co.uk/sounds/play/m001v4hp,2024-01-13

	#below is the actual script

	import requests
	import re
	import spotipy
	from spotipy.oauth2 import SpotifyOAuth
	import cred
	import time
	import pandas as pd
	from lxml import html
	import json
	from datetime import datetime
	import random

	delay = random.randint(0, 180)
	time.sleep(delay)

	scope = "playlist-read-private playlist-modify-private playlist-modify-public user-library-read"
	sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=cred.client_id , client_secret= cred.client_secret ,redirect_uri=cred.redirect_url, scope=scope, open_browser=False))

	show_dict = {
	'radio1_best_new_pop':{'url_part':'b07zc3js','playlist_id':'3t7laH7XOcbCFN1oItqdHo'},
	'radio2_folk_show':{'url_part':'b01phglj','playlist_id':'5DjFGSMemnQlto1iopbULA'},
	'radio3_music_planet':{'url_part':'b09ymx3v','playlist_id':'197QzBV7LFn6o4rFbUe1kJ'},
	'radio1_rock_show':{'url_part':'b006wq4s','playlist_id':'2rjHoY4rckW70ChgsK1JUc'},
	'radio6_the_morning_after_mix':{'url_part':'p071z8z5','playlist_id':'3OFAXHgkjxsJ1tGBBzKWCt'},
	'radio1_the_chillest_show':{'url_part':'b03hjfww','playlist_id':'4x1Qroq1N7F4mwsfKC2oPJ'},
	'radio6_gilles_peterson_show':{'url_part':'b01fm4ss','playlist_id':'2hh8x1bPsgPpxwhIV0muAn'},
	}

	bbc_update_urls_df = pd.read_csv('bbc_update_urls.csv')

	for show in show_dict:
	last_week_episode_url = bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_url'].values[0]
	last_week_episode_date_raw = bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_date'].values[0]
	last_week_episode_date_object = datetime.strptime(last_week_episode_date_raw, '%Y-%m-%d')

	playlist_id = show_dict[show]['playlist_id']

	#counting number of tracks in playlist
	results = sp.playlist_items(playlist_id, offset=0, market='IN')
	items = results['items']
	while results['next']:
	time.sleep(5)
	results = sp.next(results)
	items.extend(results['items'])

	#delete last 40 items if number of tracks in playlist over 9960
	#since tracks from updates are inserted at the top, the oldest tracks will be at the bottom and will be removed
	if len(items) >= 9960:
	last_40_items = items[-40:]
	last_40_ids = []
	for item in last_40_items:
	track_idx = item['track']['id']
	last_40_ids.append(track_idx)

	sp.playlist_remove_all_occurrences_of_items(playlist_id, last_40_ids)

	track_spotify_id_list = []

	show_url = 'https://www.bbc.co.uk/sounds/brand/' + show_dict[show]['url_part']
	page = requests.get(show_url)
	tree = html.fromstring(page.content)
	list_of_episodes = tree.xpath("//a[contains(@class,'sc-c-playable-list-card__link')]")

	new_date_object_list = []
	new_episode_url_list = []

	for episode in list_of_episodes:
	episode_url = 'https://www.bbc.co.uk' + episode.xpath("./@href")[0]
	episode_title = episode.xpath(".//span[contains(@class,'sc-o-link__text')]/text()")[0]
	episode_date_raw = episode.xpath(".//li[contains(@aria-label,'release date')]/text()")[0]
	episode_date_object = datetime.strptime(episode_date_raw, '%d %b %Y')

	if episode_date_object > last_week_episode_date_object:

	new_date_object_list.append(episode_date_object)
	new_episode_url_list.append(episode_url)

	page2 = requests.get(episode_url)
	tree2 = html.fromstring(page2.content)
	try:
	script_text = tree2.xpath("//script[contains(text(),'commercial-music-service-spotify')]/text()")[0]
	except:
	continue
	regex_pattern = r" window.__PRELOADED_STATE__ = (.*); "
	tracks_json_string = re.match(regex_pattern, script_text).group(1)
	tracks_json = json.loads(tracks_json_string)
	tracklist = tracks_json['tracklist']['tracks']
	for track in tracklist:
	try:
	spotify_uri = track['uris'][0]['uri']
	regex_pattern_2 = r"https://open.spotify.com/track/(.*)"
	track_id = re.match(regex_pattern_2, spotify_uri).group(1)
	track_spotify_id_list.append(track_id)
	except:
	continue


	if new_date_object_list:
	newest_episode_date_object = max(new_date_object_list)
	newest_episode_date = newest_episode_date_object.strftime('%Y-%m-%d')
	newest_index = new_date_object_list.index(newest_episode_date_object)
	newest_episode_url = new_episode_url_list[newest_index]

	#updating url in bbc df
	bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_url'] = newest_episode_url
	bbc_update_urls_df.loc[bbc_update_urls_df['show']==show,'episode_date'] = newest_episode_date

	if track_spotify_id_list:
	sp.playlist_add_items(playlist_id, track_spotify_id_list, position=0)

	bbc_update_urls_df.to_csv('bbc_update_urls.csv', index=False, encoding='utf-8')