Last active April 22, 2024
🎢 Spotify Ripper: an overlycomplicated and overengineered mess of a script to export data and songs from spotify using spotdl πŸ’₯🀘🎧
import os
import subprocess
import datetime
from mutagen.easyid3 import EasyID3
import re
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from pydub import AudioSegment
# Output directory:
output_directory = r"\\XXXXXXXXX"
SPOTIPY_REDIRECT_URI = 'https://xxxxxxxxxx'
SPOTIPY_USERNAME = 'xxxxxxxxxx'
def populate_playlists_and_weekly_url(client_id, client_secret, redirect_uri, username):
# Set up Spotipy client
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id,
# Get user's playlists
playlists = sp.current_user_playlists()
# Exclude Discover Weekly playlist
excluded_playlist_name = 'Discover Weekly'
playlists_list = ' '.join([f"{playlist['id']}" for playlist in playlists['items'] if playlist['name'] != excluded_playlist_name])
# Get the URL for the user's Discover Weekly playlist
weekly_url = next((playlist['external_urls']['spotify'] for playlist in playlists['items'] if playlist['name'] == excluded_playlist_name), None)
return playlists_list, weekly_url
def run_spotdl(output_directory, subfolder_name, spotdl_command):
print(f"\nRunning spotdl command for '{subfolder_name}'...")
# Create the full path to the subfolder
subfolder_path = os.path.join(output_directory, subfolder_name)
# Create the subfolder if it doesn't exist
os.makedirs(subfolder_path, exist_ok=True)
# Change the current working directory to the subfolder using os.chdir
# Run spotdl command
print(f"Files downloaded to: {subfolder_path}")
def create_album_playlists(output_directory, subfolder_name):
print(f"\nCreating playlists for '{subfolder_name}'...")
playlists_folder = os.path.join(output_directory, subfolder_name)
# Iterate through subfolders in the specified directory
for root, dirs, files in os.walk(playlists_folder):
for folder in dirs:
folder_path = os.path.join(root, folder)
mp3_files = [file for file in os.listdir(folder_path) if file.endswith(".mp3")]
# Only create .m3u file if there are .mp3 files in the subfolder
if mp3_files:
playlist_path = os.path.join(folder_path, f"{folder}.m3u")
# Write the .m3u playlist file
with open(playlist_path, 'w', encoding='utf-8') as playlist_file:
for mp3_file in mp3_files:
playlist_file.write(mp3_file + '\n')
print(f"Playlist created for folder '{folder}' at: {playlist_path}")
print(f"No .mp3 files found in folder '{folder}', skipping playlist creation.")
def find_and_create_missing_file(output_directory):
print(f"\nFinding and creating missing.txt file...")
# Open or create the missing.txt file
with open(os.path.join(output_directory, '1_missing.txt'), 'w', encoding='utf-8') as missing_file:
# Iterate through all files in the specified directory and its subdirectories
for root, dirs, files in os.walk(output_directory):
for file in files:
# Check if the file ends with ERR.txt
if file.endswith('ERR.txt'):
full_path = os.path.join(root, file)
# Extract content from the current file
with open(full_path, 'r', encoding='utf-8', errors='ignore') as current_file:
content =
# Write full path and content to the missing.txt file
print("missing.txt file created.")
def delete_long_songs(output_directory):
print("\nDeleting songs longer than 59 minutes...")
# Open or create the deletedERR.txt file
with open(os.path.join(output_directory, 'deletedERR.txt'), 'w', encoding='utf-8') as deleted_file:
# Iterate through all files in the specified directory and its subdirectories
for root, dirs, files in os.walk(output_directory):
for file in files:
# Check if the file ends with .mp3
if file.endswith('.mp3'):
full_path = os.path.join(root, file)
# Load the audio file using pydub
audio = AudioSegment.from_file(full_path)
# Get the duration in minutes
duration_minutes = len(audio) / (60 * 1000)
# Delete the file if duration is longer than 59 minutes
if duration_minutes > 59:
print(f"Deleted: {file}")
except Exception as e:
print(f"Error processing '{file}': {e}")
print("Deletion of long songs completed.")
#Each task is independant and can be commented out
#Link to weekly discovery playlist if you want to set manually the list without the api commentent task 0
weekly_url = ""
# List of playlists to download if you want to set manually the list without the api
playlists_list = ""
available variables for file names: {title}, {artists}, {artist}, {album}, {album-artist}, {genre}, {disc-number}, {disc-count}, {duration}, {year}, {original-date}, {track-number}, {tracks-count}, {isrc}, {track-id}, {publisher}, {list-length}, {list-position}, {list-name}, {output-ext}
# Task 0: build playlist list
playlists_list, weekly_url = populate_playlists_and_weekly_url(SPOTIPY_CLIENT_ID, SPOTIPY_CLIENT_SECRET, SPOTIPY_REDIRECT_URI, SPOTIPY_USERNAME)
print("Generated playlists_list (excluding Discover Weekly):")
print("Generated weekly_url:")
# Task 1: liked_songs
subfolder_name_1 = "1_likedsongs"
formatted_output_1 = '{artists} - {title}.{output-ext}'
spotdl_command_1 = f'spotdl sync saved --format mp3 --sync-without-deleting --user-auth --playlist-numbering --save-errors likedsongsERR.txt --save-file likedsongs.spotdl --output "{formatted_output_1}" --m3u Liked_Songs.m3u '
run_spotdl(output_directory, subfolder_name_1, spotdl_command_1)
# Task 2: Discover Weekly playlist with week number and year
#Format the week numer
iso_calendar_date =
formatted_week_and_year = f"Week_{iso_calendar_date.week}_{iso_calendar_date.year}"
print("Formatted Week and Year:", formatted_week_and_year)
subfolder_name_2 = f"1_discover_weekly\\{formatted_week_and_year}"
formatted_output_2 = '{artists} - {title}.{output-ext}'
spotdl_command_2 = f'spotdl sync {weekly_url} --format mp3 --user-auth --playlist-numbering --save-errors discover_weeklyERR.txt --save-file discover_weekly.spotdl --output "{formatted_output_2}" --m3u {formatted_week_and_year}.m3u '
run_spotdl(output_directory, subfolder_name_2, spotdl_command_2)
# Task 3: all-user-saved-playlists
subfolder_name_3 = "1_playlists"
formatted_output_3 = '{album}/{artists} - {title}.{output-ext}'
spotdl_command_3 = f'spotdl sync {playlists_list} --format mp3 --sync-without-deleting --user-auth --playlist-numbering --save-errors playlistsERR.txt --save-file playlists.spotdl --output "{formatted_output_3}" '
run_spotdl(output_directory, subfolder_name_3, spotdl_command_3)
#Create playlists for each album in "playlists" folder --playlist-numbering allow to do this
create_album_playlists(output_directory, subfolder_name_3)
# Task 4: all-user-followed-artists
subfolder_name_4 = "artists"
formatted_output_4 = '{album-artist}/{album}/{artists} - {title}.{output-ext}'
spotdl_command_4 = f'spotdl sync all-user-followed-artists --format mp3 --sync-without-deleting --user-auth --save-errors artistsERR.txt --save-file artists.spotdl --output "{formatted_output_4}" '
run_spotdl(output_directory, subfolder_name_4, spotdl_command_4)
# Task 5: all-user-saved-albums
subfolder_name_5 = "artists"
formatted_output_5 = '{album-artist}/{album}/{artists} - {title}.{output-ext}'
spotdl_command_5 = f'spotdl sync all-user-saved-albums --format mp3 --sync-without-deleting --user-auth --save-errors albumsERR.txt --save-file albums.spotdl --output "{formatted_output_5}" '
run_spotdl(output_directory, subfolder_name_5, spotdl_command_5)
# Task 6: delete fuckups and compile error logs
import os
import shutil
from mutagen.easyid3 import EasyID3
def move_mp3_to_album_folders(src_folder, dest_folder):
for filename in os.listdir(src_folder):
if filename.endswith('.mp3'):
file_path = os.path.join(src_folder, filename)
album_name = get_album_name(file_path)
if album_name:
# Remove or replace invalid characters in album_name
album_name = album_name.replace(':', '_') # Replace colon with underscore
album_folder = os.path.join(dest_folder, album_name)
os.makedirs(album_folder, exist_ok=True)
dest_path = os.path.join(album_folder, filename)
shutil.move(file_path, dest_path)
print(f"Moved {filename} to {album_folder}")
except Exception as e:
print(f"Error moving {filename} to {album_folder}: {e}")
def get_album_name(file_path):
audiofile = EasyID3(file_path)
if 'album' in audiofile:
return audiofile['album'][0]
except Exception as e:
print(f"Error reading metadata for {file_path}: {e}")
return None
if __name__ == "__main__":
source_folder = r""
destination_folder = r""
move_mp3_to_album_folders(source_folder, destination_folder)
import os
import shutil
from mutagen.easyid3 import EasyID3
def organize_music_by_artist(src_folder, dest_folder):
for root, dirs, files in os.walk(src_folder):
for filename in files:
if filename.endswith('.mp3'):
file_path = os.path.join(root, filename)
album_artist, album = get_album_info(file_path)
if album_artist and album:
artist_folder = os.path.join(dest_folder, album_artist)
album_folder = os.path.join(artist_folder, album)
os.makedirs(album_folder, exist_ok=True)
dest_path = os.path.join(album_folder, filename)
shutil.move(file_path, dest_path)
print(f"Moved {filename} to {album_folder}")
except Exception as e:
print(f"Error moving {filename} to {album_folder}: {e}")
def get_album_info(file_path):
audiofile = EasyID3(file_path)
album_artist = audiofile.get('albumartist', [None])[0]
album = audiofile.get('album', [None])[0]
return album_artist, album
except Exception as e:
print(f"Error reading metadata for {file_path}: {e}")
return None, None
if __name__ == "__main__":
source_folder = r""
destination_folder = r""
organize_music_by_artist(source_folder, destination_folder)

Spotify Music Ripper

Created this tool because Lidarr wasn't working for me. I utilize Spotify as a dashboard to control the downloads, the .py script to download and listen to music through Navidrome.

All of which seems to be the least problematic FOSS/self-hosted/homelab system for unconventional music tastes.


Downloads liked songs, all saved playlist, saved albums, saved artists, the current week's Discover Weekly and it will keep them organised for long term storage. Generates .m3u files for Navidrome/Jellyfin and outputs missing items in a .txt file.

Curent State:



  • Determine how to handle missing.txt to automate downloads.
  • Investigate why it doesn't run in headless modeβ€”possibly requires API tokens.
  • Find a solution for issue 1970 regarding 10h/1h files.
  • Until Issue 2000: --m3u {list}.m3u is not added to playlist downloads. Once fixed, the create playlist functions can be removed. (would it play nice with subfolders ?)
  • if this issue is resolved, remove the api scraper and reset the weekly manualy.
  • troubleshoot --sponsor-block check why ffmpeg issue it would be nice to be a default everywhere in any case
  • automate the variables
  • setup the requirement in the script

How to use:


Fill in the variables and run the script.

import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from datetime import datetime
# Set your Spotify API credentials
# Set your Spotify username
# Specify the folder to store text files
OUTPUT_FOLDER = 'spotifyrippingmachine'
# Create the folder if it doesn't exist
if not os.path.exists(OUTPUT_FOLDER):
# Create a separate folder for Discover Weekly
DISCOVER_WEEKLY_FOLDER = os.path.join(OUTPUT_FOLDER, 'Discover Weekly')
if not os.path.exists(DISCOVER_WEEKLY_FOLDER):
# Scope for necessary permissions
SCOPE = 'user-library-read playlist-read-private user-follow-read'
# Remove token cache
token_cache_path = os.path.join(OUTPUT_FOLDER, '.cache-username')
if os.path.exists(token_cache_path):
# Authenticate using SpotifyOAuth
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
def get_names_and_urls(items):
return [(item['name'], item['external_urls']['spotify']) for item in items]
def write_to_file(folder, filename, data):
filepath = os.path.join(folder, filename)
with open(filepath, 'w', encoding='utf-8') as file:
for name, url in data:
file.write(f"{name}: {url}\n")
def get_all_items(api_call):
all_items = []
response = api_call()
if 'artists' in response:
items = response['artists']['items']
while response['artists'].get('next'):
response =['artists'])
items = response['artists']['items']
items = response.get('items', [])
while response.get('next'):
response =
items = response.get('items', [])
except TypeError:
# Handle the case where the API call does not support 'items'
return all_items
# Get and write playlists
playlists = get_all_items(lambda: sp.current_user_playlists())
for playlist in playlists:
if playlist['name'] == 'Discover Weekly':
# Create a file for the current week in the Discover Weekly folder
current_date ='%Y-%W')
discover_weekly_file = os.path.join(DISCOVER_WEEKLY_FOLDER, f'{current_date}.txt')
# Write the songs and links to the Discover Weekly file
tracks = get_all_items(lambda: sp.playlist_tracks(playlist['id']))
track_data = get_names_and_urls([track['track'] for track in tracks])
write_to_file(DISCOVER_WEEKLY_FOLDER, f'{current_date}.txt', track_data)
# For other playlists, write the data as usual
playlist_data = get_names_and_urls(playlists)
write_to_file(OUTPUT_FOLDER, 'playlists.txt', playlist_data)
# Get and write liked songs
liked_songs = get_all_items(lambda: sp.current_user_saved_tracks())
liked_songs_data = get_names_and_urls([track['track'] for track in liked_songs])
write_to_file(OUTPUT_FOLDER, 'likedsongs.txt', liked_songs_data)
# Get and write albums
albums = get_all_items(lambda: sp.current_user_saved_albums())
album_data = get_names_and_urls([album['album'] for album in albums])
write_to_file(OUTPUT_FOLDER, 'albums.txt', album_data)
# Get and write artists
artists = get_all_items(lambda: sp.current_user_followed_artists())
artist_data = get_names_and_urls(artists)
write_to_file(OUTPUT_FOLDER, 'artists.txt', artist_data)
print(f"Data has been written to {OUTPUT_FOLDER}/playlists.txt, {OUTPUT_FOLDER}/albums.txt, {OUTPUT_FOLDER}/artists.txt, and {OUTPUT_FOLDER}/likedsongs.txt.")
print(f"Discover Weekly data has been written to {DISCOVER_WEEKLY_FOLDER}/{current_date}.txt.")
import os
import subprocess
from datetime import datetime
def create_folders(input_dir, output_dir):
# Get the current date and time for the log file name
current_datetime ="%Y%m%d%H%M%S")
log_filename = f"log{current_datetime}.txt"
log_path = os.path.join(output_dir, log_filename)
# List all files in the input directory
files = [f for f in os.listdir(input_dir) if f.endswith('.txt')]
with open(log_path, 'w') as log_file:
log_file.write(f"Execution started at: {current_datetime}\n")
for file in files:
file_path = os.path.join(input_dir, file)
# Create a folder with the name of the file
folder_name = os.path.splitext(file)[0]
folder_path = os.path.join(output_dir, folder_name)
os.makedirs(folder_path, exist_ok=True)
# Remove existing .m3u files in the folder
existing_m3u_files = [f for f in os.listdir(folder_path) if f.endswith('.m3u')]
for existing_m3u_file in existing_m3u_files:
os.remove(os.path.join(folder_path, existing_m3u_file))
# Read the content of the file
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Check if the file is likedsongs.txt
if file == 'likedsongs.txt':
# Create a subfolder named 'likedsongs'
subfolder_name = 'likedsongs'
subfolder_path = os.path.join(output_dir, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)
# Change the working directory to the likedsongs subfolder
# Run spotdl download with all the URLs
all_urls = [line.strip().rsplit(': ', 1)[-1] for line in lines]
if all_urls:['spotdl', 'download'] + all_urls)
# Change back to the original working directory
# Replace invalid characters in the subfolder name
subfolder_name = subfolder_name.replace('<', '_').replace('>', '_').replace(':', '_').replace('"', '_').replace('/', '_').replace('\\', '_').replace('|', '_').replace('?', '_').replace('*', '_')
# Create .m3u playlist in the likedsongs subfolder
playlist_name = f"{folder_name}.m3u"
playlist_path = os.path.join(output_dir, playlist_name)
# Include only .mp3 files in the playlist
with open(playlist_path, 'w', encoding='utf-8-sig') as playlist_file:
# Get all the downloaded files in the likedsongs subfolder
files_in_subfolder = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
mp3_files = [f for f in files_in_subfolder if f.lower().endswith('.mp3')]
for mp3_file in mp3_files:
# Write only the name of the .mp3 file to the playlist
print(f"Imported: {os.path.join(subfolder_path, mp3_file)}")
except FileNotFoundError:
print(f"Error: Could not find files in the subfolder: {subfolder_path}")
elif file == 'playlist.txt':
# Create a subfolder named 'playlist'
subfolder_name = 'playlist'
subfolder_path = os.path.join(output_dir, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)
# Change the working directory to the playlist subfolder
# Filter out lines starting with 'Discover Weekly:'
filtered_lines = [line for line in lines if not line.startswith('Discover Weekly:')]
# Run spotdl download with filtered URLs
all_urls = [line.strip().rsplit(': ', 1)[-1] for line in filtered_lines]
if all_urls:['spotdl', 'download'] + all_urls)
# Change back to the original working directory
# Replace invalid characters in the subfolder name
subfolder_name = subfolder_name.replace('<', '_').replace('>', '_').replace(':', '_').replace('"', '_').replace('/', '_').replace('\\', '_').replace('|', '_').replace('?', '_').replace('*', '_')
# Create .m3u playlist in the playlist subfolder
playlist_name = f"{folder_name}.m3u"
playlist_path = os.path.join(subfolder_path, playlist_name)
# Include only .mp3 files in the playlist
with open(playlist_path, 'w', encoding='utf-8-sig') as playlist_file:
# Get all the downloaded files in the playlist subfolder
files_in_subfolder = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
mp3_files = [f for f in files_in_subfolder if f.lower().endswith('.mp3')]
for mp3_file in mp3_files:
# Write only the name of the .mp3 file to the playlist
print(f"Imported: {os.path.join(subfolder_path, mp3_file)}")
except FileNotFoundError:
print(f"Error: Could not find files in the subfolder: {subfolder_path}")
# Create a subfolder with the first part of each line
for line in lines:
line_parts = line.strip().rsplit(': ', 1)
# Check if the line has the required number of elements
if len(line_parts) == 2:
subfolder_name = line_parts[0]
# Replace invalid characters in the subfolder name
invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
for char in invalid_chars:
subfolder_name = subfolder_name.replace(char, '_')
subfolder_path = os.path.join(folder_path, subfolder_name)
os.makedirs(subfolder_path, exist_ok=True)
# Remove trailing spaces from the subfolder name
subfolder_name = subfolder_name.rstrip()
# Change the working directory to the subfolder
# Run spotdl on the URL
url = line_parts[1]['spotdl', 'download', url])
# Change back to the original working directory
# Check if the folder is neither "albums" nor "artists" before creating .m3u playlist
if folder_name.lower() not in ['albums', 'artists']:
# Replace invalid characters in the subfolder name
subfolder_name = subfolder_name.replace('<', '_').replace('>', '_').replace(':', '_').replace('"', '_').replace('/', '_').replace('\\', '_').replace('|', '_').replace('?', '_').replace('*', '_')
# Create .m3u playlist inside the subfolder
playlist_name = f"{folder_name}.m3u"
playlist_path = os.path.join(output_dir, playlist_name)
# Include only .mp3 files in the playlist
with open(playlist_path, 'w', encoding='utf-8-sig') as playlist_file:
# Get all the downloaded files in the subfolder
files_in_subfolder = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
mp3_files = [f for f in files_in_subfolder if f.lower().endswith('.mp3')]
for mp3_file in mp3_files:
# Write only the name of the .mp3 file to the playlist
print(f"Imported: {os.path.join(subfolder_path, mp3_file)}")
except FileNotFoundError:
print(f"Error: Could not find files in the subfolder: {subfolder_path}")
print(f"Skipping invalid line: {line}")
print(f"Processing complete. Log file created at: {log_path}")
if __name__ == "__main__":
# Set input and output directories
input_directory = r"XXXXXXXXXXX"
output_directory = r"XXXXXXX"
create_folders(input_directory, output_directory)
does this currently work?

i made a more formal repo with more features there and a couple more updates.

still in its infancy until i finish the automation part.
but i have to say as it is working in its current state without tinkering the motivation to move forward is quite low :D
if the userbase grows maybe i will find the will to get it to a state more feature complete

