Skip to content

Instantly share code, notes, and snippets.

@P6g9YHK6
Last active April 22, 2024 10:03
Show Gist options
  • Save P6g9YHK6/e7b8a645fa2a84f29b5937f6819ca5fb to your computer and use it in GitHub Desktop.
Save P6g9YHK6/e7b8a645fa2a84f29b5937f6819ca5fb to your computer and use it in GitHub Desktop.
🎢 Spotify Ripper: an overlycomplicated and overengineered mess of a script to export data and songs from spotify using spotdl πŸ’₯🀘🎧
import os
import subprocess
import datetime
from mutagen.easyid3 import EasyID3
import re
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from pydub import AudioSegment
# Output directory:
output_directory = r"\\XXXXXXXXX"
#API FOR THE EXTRACTOR
SPOTIPY_CLIENT_ID = 'xxxxxxx'
SPOTIPY_CLIENT_SECRET = 'xxxxxxxxx'
SPOTIPY_REDIRECT_URI = 'https://xxxxxxxxxx'
SPOTIPY_USERNAME = 'xxxxxxxxxx'
def populate_playlists_and_weekly_url(client_id, client_secret, redirect_uri, username):
# Set up Spotipy client
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id,
client_secret=client_secret,
redirect_uri=redirect_uri,
scope="playlist-read-private",
username=username))
# Get user's playlists
playlists = sp.current_user_playlists()
# Exclude Discover Weekly playlist
excluded_playlist_name = 'Discover Weekly'
playlists_list = ' '.join([f"https://open.spotify.com/playlist/{playlist['id']}" for playlist in playlists['items'] if playlist['name'] != excluded_playlist_name])
# Get the URL for the user's Discover Weekly playlist
weekly_url = next((playlist['external_urls']['spotify'] for playlist in playlists['items'] if playlist['name'] == excluded_playlist_name), None)
return playlists_list, weekly_url
def run_spotdl(output_directory, subfolder_name, spotdl_command):
print(f"\nRunning spotdl command for '{subfolder_name}'...")
print(f"{spotdl_command}")
# Create the full path to the subfolder
subfolder_path = os.path.join(output_directory, subfolder_name)
# Create the subfolder if it doesn't exist
os.makedirs(subfolder_path, exist_ok=True)
# Change the current working directory to the subfolder using os.chdir
os.chdir(subfolder_path)
# Run spotdl command
subprocess.run(spotdl_command)
print(f"Files downloaded to: {subfolder_path}")
def create_album_playlists(output_directory, subfolder_name):
print(f"\nCreating playlists for '{subfolder_name}'...")
playlists_folder = os.path.join(output_directory, subfolder_name)
# Iterate through subfolders in the specified directory
for root, dirs, files in os.walk(playlists_folder):
for folder in dirs:
folder_path = os.path.join(root, folder)
mp3_files = [file for file in os.listdir(folder_path) if file.endswith(".mp3")]
# Only create .m3u file if there are .mp3 files in the subfolder
if mp3_files:
playlist_path = os.path.join(folder_path, f"{folder}.m3u")
# Write the .m3u playlist file
with open(playlist_path, 'w', encoding='utf-8') as playlist_file:
for mp3_file in mp3_files:
playlist_file.write(mp3_file + '\n')
print(f"Playlist created for folder '{folder}' at: {playlist_path}")
else:
print(f"No .mp3 files found in folder '{folder}', skipping playlist creation.")
def find_and_create_missing_file(output_directory):
print(f"\nFinding and creating missing.txt file...")
# Open or create the missing.txt file
with open(os.path.join(output_directory, '1_missing.txt'), 'w', encoding='utf-8') as missing_file:
# Iterate through all files in the specified directory and its subdirectories
for root, dirs, files in os.walk(output_directory):
for file in files:
# Check if the file ends with ERR.txt
if file.endswith('ERR.txt'):
full_path = os.path.join(root, file)
# Extract content from the current file
with open(full_path, 'r', encoding='utf-8', errors='ignore') as current_file:
content = current_file.read()
# Write full path and content to the missing.txt file
missing_file.write(f"{full_path}\n{content}\n\n")
print("missing.txt file created.")
def delete_long_songs(output_directory):
print("\nDeleting songs longer than 59 minutes...")
# Open or create the deletedERR.txt file
with open(os.path.join(output_directory, 'deletedERR.txt'), 'w', encoding='utf-8') as deleted_file:
# Iterate through all files in the specified directory and its subdirectories
for root, dirs, files in os.walk(output_directory):
for file in files:
# Check if the file ends with .mp3
if file.endswith('.mp3'):
full_path = os.path.join(root, file)
try:
# Load the audio file using pydub
audio = AudioSegment.from_file(full_path)
# Get the duration in minutes
duration_minutes = len(audio) / (60 * 1000)
# Delete the file if duration is longer than 59 minutes
if duration_minutes > 59:
os.remove(full_path)
deleted_file.write(f"{os.path.splitext(file)[0]}\n")
print(f"Deleted: {file}")
except Exception as e:
print(f"Error processing '{file}': {e}")
print("Deletion of long songs completed.")
#Each task is independant and can be commented out
'''
#Link to weekly discovery playlist if you want to set manually the list without the api commentent task 0
weekly_url = "https://open.spotify.com/playlist/XXXXXXXXXXXXXXXX"
# List of playlists to download if you want to set manually the list without the api
playlists_list = "https://open.spotify.com/playlist/XXXXXXXXXXXXXXXXXXXXXXX https://open.spotify.com/playlist/XXXXXXXXXXXXXXXXXXXXXXX"
'''
'''
Cheatlist:
available variables for file names: {title}, {artists}, {artist}, {album}, {album-artist}, {genre}, {disc-number}, {disc-count}, {duration}, {year}, {original-date}, {track-number}, {tracks-count}, {isrc}, {track-id}, {publisher}, {list-length}, {list-position}, {list-name}, {output-ext}
'''
# Task 0: build playlist list
playlists_list, weekly_url = populate_playlists_and_weekly_url(SPOTIPY_CLIENT_ID, SPOTIPY_CLIENT_SECRET, SPOTIPY_REDIRECT_URI, SPOTIPY_USERNAME)
print("Generated playlists_list (excluding Discover Weekly):")
print(playlists_list)
print("Generated weekly_url:")
print(weekly_url)
# Task 1: liked_songs
subfolder_name_1 = "1_likedsongs"
formatted_output_1 = '{artists} - {title}.{output-ext}'
spotdl_command_1 = f'spotdl sync saved --format mp3 --sync-without-deleting --user-auth --playlist-numbering --save-errors likedsongsERR.txt --save-file likedsongs.spotdl --output "{formatted_output_1}" --m3u Liked_Songs.m3u '
run_spotdl(output_directory, subfolder_name_1, spotdl_command_1)
# Task 2: Discover Weekly playlist with week number and year
#Format the week numer
iso_calendar_date = datetime.date.today().isocalendar()
formatted_week_and_year = f"Week_{iso_calendar_date.week}_{iso_calendar_date.year}"
print("Formatted Week and Year:", formatted_week_and_year)
subfolder_name_2 = f"1_discover_weekly\\{formatted_week_and_year}"
formatted_output_2 = '{artists} - {title}.{output-ext}'
spotdl_command_2 = f'spotdl sync {weekly_url} --format mp3 --user-auth --playlist-numbering --save-errors discover_weeklyERR.txt --save-file discover_weekly.spotdl --output "{formatted_output_2}" --m3u {formatted_week_and_year}.m3u '
run_spotdl(output_directory, subfolder_name_2, spotdl_command_2)
# Task 3: all-user-saved-playlists
subfolder_name_3 = "1_playlists"
formatted_output_3 = '{album}/{artists} - {title}.{output-ext}'
spotdl_command_3 = f'spotdl sync {playlists_list} --format mp3 --sync-without-deleting --user-auth --playlist-numbering --save-errors playlistsERR.txt --save-file playlists.spotdl --output "{formatted_output_3}" '
run_spotdl(output_directory, subfolder_name_3, spotdl_command_3)
#Create playlists for each album in "playlists" folder --playlist-numbering allow to do this
create_album_playlists(output_directory, subfolder_name_3)
# Task 4: all-user-followed-artists
subfolder_name_4 = "artists"
formatted_output_4 = '{album-artist}/{album}/{artists} - {title}.{output-ext}'
spotdl_command_4 = f'spotdl sync all-user-followed-artists --format mp3 --sync-without-deleting --user-auth --save-errors artistsERR.txt --save-file artists.spotdl --output "{formatted_output_4}" '
run_spotdl(output_directory, subfolder_name_4, spotdl_command_4)
# Task 5: all-user-saved-albums
subfolder_name_5 = "artists"
formatted_output_5 = '{album-artist}/{album}/{artists} - {title}.{output-ext}'
spotdl_command_5 = f'spotdl sync all-user-saved-albums --format mp3 --sync-without-deleting --user-auth --save-errors albumsERR.txt --save-file albums.spotdl --output "{formatted_output_5}" '
run_spotdl(output_directory, subfolder_name_5, spotdl_command_5)
# Task 6: delete fuckups and compile error logs
#delete_long_songs(output_directory)
find_and_create_missing_file(output_directory)
import os
import shutil
from mutagen.easyid3 import EasyID3
def move_mp3_to_album_folders(src_folder, dest_folder):
for filename in os.listdir(src_folder):
if filename.endswith('.mp3'):
file_path = os.path.join(src_folder, filename)
album_name = get_album_name(file_path)
if album_name:
# Remove or replace invalid characters in album_name
album_name = album_name.replace(':', '_') # Replace colon with underscore
album_folder = os.path.join(dest_folder, album_name)
try:
os.makedirs(album_folder, exist_ok=True)
dest_path = os.path.join(album_folder, filename)
shutil.move(file_path, dest_path)
print(f"Moved {filename} to {album_folder}")
except Exception as e:
print(f"Error moving {filename} to {album_folder}: {e}")
def get_album_name(file_path):
try:
audiofile = EasyID3(file_path)
if 'album' in audiofile:
return audiofile['album'][0]
except Exception as e:
print(f"Error reading metadata for {file_path}: {e}")
return None
if __name__ == "__main__":
source_folder = r""
destination_folder = r""
move_mp3_to_album_folders(source_folder, destination_folder)
import os
import shutil
from mutagen.easyid3 import EasyID3
def organize_music_by_artist(src_folder, dest_folder):
for root, dirs, files in os.walk(src_folder):
for filename in files:
if filename.endswith('.mp3'):
file_path = os.path.join(root, filename)
album_artist, album = get_album_info(file_path)
if album_artist and album:
artist_folder = os.path.join(dest_folder, album_artist)
album_folder = os.path.join(artist_folder, album)
try:
os.makedirs(album_folder, exist_ok=True)
dest_path = os.path.join(album_folder, filename)
shutil.move(file_path, dest_path)
print(f"Moved {filename} to {album_folder}")
except Exception as e:
print(f"Error moving {filename} to {album_folder}: {e}")
def get_album_info(file_path):
try:
audiofile = EasyID3(file_path)
album_artist = audiofile.get('albumartist', [None])[0]
album = audiofile.get('album', [None])[0]
return album_artist, album
except Exception as e:
print(f"Error reading metadata for {file_path}: {e}")
return None, None
if __name__ == "__main__":
source_folder = r""
destination_folder = r""
organize_music_by_artist(source_folder, destination_folder)

Spotify Music Ripper

Created this tool because Lidarr wasn't working for me. I utilize Spotify as a dashboard to control the downloads, the .py script to download and listen to music through Navidrome.

All of which seems to be the least problematic FOSS/self-hosted/homelab system for unconventional music tastes.

Functionality:

Downloads liked songs, all saved playlist, saved albums, saved artists, the current week's Discover Weekly and it will keep them organised for long term storage. Generates .m3u files for Navidrome/Jellyfin and outputs missing items in a .txt file.

Curent State:

Functional

TODO:

  • Determine how to handle missing.txt to automate downloads.
  • Investigate why it doesn't run in headless modeβ€”possibly requires API tokens.
  • Find a solution for issue 1970 regarding 10h/1h files.
  • Until Issue 2000: --m3u {list}.m3u is not added to playlist downloads. Once fixed, the create playlist functions can be removed. (would it play nice with subfolders ?)
  • if this issue is resolved, remove the api scraper and reset the weekly manualy.
  • troubleshoot --sponsor-block check why ffmpeg issue it would be nice to be a default everywhere in any case
  • automate the variables
  • setup the requirement in the script

How to use:

then:

Fill in the variables and run the script.

import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from datetime import datetime
# Set your Spotify API credentials
SPOTIPY_CLIENT_ID = ''
SPOTIPY_CLIENT_SECRET = ''
SPOTIPY_REDIRECT_URI = ''
# Set your Spotify username
SPOTIPY_USERNAME = ''
# Specify the folder to store text files
OUTPUT_FOLDER = 'spotifyrippingmachine'
# Create the folder if it doesn't exist
if not os.path.exists(OUTPUT_FOLDER):
os.makedirs(OUTPUT_FOLDER)
# Create a separate folder for Discover Weekly
DISCOVER_WEEKLY_FOLDER = os.path.join(OUTPUT_FOLDER, 'Discover Weekly')
if not os.path.exists(DISCOVER_WEEKLY_FOLDER):
os.makedirs(DISCOVER_WEEKLY_FOLDER)
# Scope for necessary permissions
SCOPE = 'user-library-read playlist-read-private user-follow-read'
# Remove token cache
token_cache_path = os.path.join(OUTPUT_FOLDER, '.cache-username')
if os.path.exists(token_cache_path):
os.remove(token_cache_path)
# Authenticate using SpotifyOAuth
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
client_secret=SPOTIPY_CLIENT_SECRET,
redirect_uri=SPOTIPY_REDIRECT_URI,
scope=SCOPE,
username=SPOTIPY_USERNAME))
def get_names_and_urls(items):
return [(item['name'], item['external_urls']['spotify']) for item in items]
def write_to_file(folder, filename, data):
filepath = os.path.join(folder, filename)
with open(filepath, 'w', encoding='utf-8') as file:
for name, url in data:
file.write(f"{name}: {url}\n")
def get_all_items(api_call):
all_items = []
try:
response = api_call()
if 'artists' in response:
items = response['artists']['items']
all_items.extend(items)
while response['artists'].get('next'):
response = sp.next(response['artists'])
items = response['artists']['items']
all_items.extend(items)
else:
items = response.get('items', [])
all_items.extend(items)
while response.get('next'):
response = sp.next(response)
items = response.get('items', [])
all_items.extend(items)
except TypeError:
# Handle the case where the API call does not support 'items'
all_items.extend(response)
return all_items
# Get and write playlists
playlists = get_all_items(lambda: sp.current_user_playlists())
for playlist in playlists:
if playlist['name'] == 'Discover Weekly':
# Create a file for the current week in the Discover Weekly folder
current_date = datetime.now().strftime('%Y-%W')
discover_weekly_file = os.path.join(DISCOVER_WEEKLY_FOLDER, f'{current_date}.txt')
# Write the songs and links to the Discover Weekly file
tracks = get_all_items(lambda: sp.playlist_tracks(playlist['id']))
track_data = get_names_and_urls([track['track'] for track in tracks])
write_to_file(DISCOVER_WEEKLY_FOLDER, f'{current_date}.txt', track_data)
else:
# For other playlists, write the data as usual
playlist_data = get_names_and_urls(playlists)
write_to_file(OUTPUT_FOLDER, 'playlists.txt', playlist_data)
# Get and write liked songs
liked_songs = get_all_items(lambda: sp.current_user_saved_tracks())
liked_songs_data = get_names_and_urls([track['track'] for track in liked_songs])
write_to_file(OUTPUT_FOLDER, 'likedsongs.txt', liked_songs_data)
# Get and write albums
albums = get_all_items(lambda: sp.current_user_saved_albums())
album_data = get_names_and_urls([album['album'] for album in albums])
write_to_file(OUTPUT_FOLDER, 'albums.txt', album_data)
# Get and write artists
artists = get_all_items(lambda: sp.current_user_followed_artists())
artist_data = get_names_and_urls(artists)
write_to_file(OUTPUT_FOLDER, 'artists.txt', artist_data)
print(f"Data has been written to {OUTPUT_FOLDER}/playlists.txt, {OUTPUT_FOLDER}/albums.txt, {OUTPUT_FOLDER}/artists.txt, and {OUTPUT_FOLDER}/likedsongs.txt.")
print(f"Discover Weekly data has been written to {DISCOVER_WEEKLY_FOLDER}/{current_date}.txt.")
import os
import subprocess
from datetime import datetime
def create_folders(input_dir, output_dir):
# Get the current date and time for the log file name
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
log_filename = f"log{current_datetime}.txt"
log_path = os.path.join(output_dir, log_filename)
# List all files in the input directory
files = [f for f in os.listdir(input_dir) if f.endswith('.txt')]
with open(log_path, 'w') as log_file:
log_file.write(f"Execution started at: {current_datetime}\n")
for file in files:
file_path = os.path.join(input_dir, file)
# Create a folder with the name of the file
folder_name = os.path.splitext(file)[0]
folder_path = os.path.join(output_dir, folder_name)
os.makedirs(folder_path, exist_ok=True)
# Remove existing .m3u files in the folder
existing_m3u_files = [f for f in os.listdir(folder_path) if f.endswith('.m3u')]
for existing_m3u_file in existing_m3u_files:
os.remove(os.path.join(folder_path, existing_m3u_file))
# Read the content of the file
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Check if the file is likedsongs.txt
if file == 'likedsongs.txt':
# Create a subfolder named 'likedsongs'
subfolder_name = 'likedsongs'
subfolder_path = os.path.join(output_dir, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)
# Change the working directory to the likedsongs subfolder
os.chdir(subfolder_path)
# Run spotdl download with all the URLs
all_urls = [line.strip().rsplit(': ', 1)[-1] for line in lines]
if all_urls:
subprocess.run(['spotdl', 'download'] + all_urls)
# Change back to the original working directory
os.chdir(output_dir)
# Replace invalid characters in the subfolder name
subfolder_name = subfolder_name.replace('<', '_').replace('>', '_').replace(':', '_').replace('"', '_').replace('/', '_').replace('\\', '_').replace('|', '_').replace('?', '_').replace('*', '_')
# Create .m3u playlist in the likedsongs subfolder
playlist_name = f"{folder_name}.m3u"
playlist_path = os.path.join(output_dir, playlist_name)
# Include only .mp3 files in the playlist
with open(playlist_path, 'w', encoding='utf-8-sig') as playlist_file:
# Get all the downloaded files in the likedsongs subfolder
try:
files_in_subfolder = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
mp3_files = [f for f in files_in_subfolder if f.lower().endswith('.mp3')]
for mp3_file in mp3_files:
# Write only the name of the .mp3 file to the playlist
playlist_file.write(f"{mp3_file}\n")
print(f"Imported: {os.path.join(subfolder_path, mp3_file)}")
except FileNotFoundError:
print(f"Error: Could not find files in the subfolder: {subfolder_path}")
elif file == 'playlist.txt':
# Create a subfolder named 'playlist'
subfolder_name = 'playlist'
subfolder_path = os.path.join(output_dir, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)
# Change the working directory to the playlist subfolder
os.chdir(subfolder_path)
# Filter out lines starting with 'Discover Weekly:'
filtered_lines = [line for line in lines if not line.startswith('Discover Weekly:')]
# Run spotdl download with filtered URLs
all_urls = [line.strip().rsplit(': ', 1)[-1] for line in filtered_lines]
if all_urls:
subprocess.run(['spotdl', 'download'] + all_urls)
# Change back to the original working directory
os.chdir(output_dir)
# Replace invalid characters in the subfolder name
subfolder_name = subfolder_name.replace('<', '_').replace('>', '_').replace(':', '_').replace('"', '_').replace('/', '_').replace('\\', '_').replace('|', '_').replace('?', '_').replace('*', '_')
# Create .m3u playlist in the playlist subfolder
playlist_name = f"{folder_name}.m3u"
playlist_path = os.path.join(subfolder_path, playlist_name)
# Include only .mp3 files in the playlist
with open(playlist_path, 'w', encoding='utf-8-sig') as playlist_file:
# Get all the downloaded files in the playlist subfolder
try:
files_in_subfolder = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
mp3_files = [f for f in files_in_subfolder if f.lower().endswith('.mp3')]
for mp3_file in mp3_files:
# Write only the name of the .mp3 file to the playlist
playlist_file.write(f"{mp3_file}\n")
print(f"Imported: {os.path.join(subfolder_path, mp3_file)}")
except FileNotFoundError:
print(f"Error: Could not find files in the subfolder: {subfolder_path}")
else:
# Create a subfolder with the first part of each line
for line in lines:
line_parts = line.strip().rsplit(': ', 1)
# Check if the line has the required number of elements
if len(line_parts) == 2:
subfolder_name = line_parts[0]
# Replace invalid characters in the subfolder name
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
for char in invalid_chars:
subfolder_name = subfolder_name.replace(char, '_')
subfolder_path = os.path.join(folder_path, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)
# Remove trailing spaces from the subfolder name
subfolder_name = subfolder_name.rstrip()
# Change the working directory to the subfolder
os.chdir(subfolder_path)
# Run spotdl on the URL
url = line_parts[1]
subprocess.run(['spotdl', 'download', url])
# Change back to the original working directory
os.chdir(output_dir)
# Check if the folder is neither "albums" nor "artists" before creating .m3u playlist
if folder_name.lower() not in ['albums', 'artists']:
# Replace invalid characters in the subfolder name
subfolder_name = subfolder_name.replace('<', '_').replace('>', '_').replace(':', '_').replace('"', '_').replace('/', '_').replace('\\', '_').replace('|', '_').replace('?', '_').replace('*', '_')
# Create .m3u playlist inside the subfolder
playlist_name = f"{folder_name}.m3u"
playlist_path = os.path.join(output_dir, playlist_name)
# Include only .mp3 files in the playlist
with open(playlist_path, 'w', encoding='utf-8-sig') as playlist_file:
# Get all the downloaded files in the subfolder
try:
files_in_subfolder = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
mp3_files = [f for f in files_in_subfolder if f.lower().endswith('.mp3')]
for mp3_file in mp3_files:
# Write only the name of the .mp3 file to the playlist
playlist_file.write(f"{mp3_file}\n")
print(f"Imported: {os.path.join(subfolder_path, mp3_file)}")
except FileNotFoundError:
print(f"Error: Could not find files in the subfolder: {subfolder_path}")
else:
print(f"Skipping invalid line: {line}")
print(f"Processing complete. Log file created at: {log_path}")
if __name__ == "__main__":
# Set input and output directories
input_directory = r"XXXXXXXXXXX"
output_directory = r"XXXXXXX"
create_folders(input_directory, output_directory)
@P6g9YHK6
Copy link
Author

does this currently work?

i made a more formal repo with more features there and a couple more updates.
https://github.com/P6g9YHK6/SpotifyRipper

still in its infancy until i finish the automation part.
but i have to say as it is working in its current state without tinkering the motivation to move forward is quite low :D
if the userbase grows maybe i will find the will to get it to a state more feature complete

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment