Skip to content

Instantly share code, notes, and snippets.

@sangrepura
Created September 23, 2025 00:29
Show Gist options
  • Save sangrepura/240233decd9986ef9e13ad4b95079f49 to your computer and use it in GitHub Desktop.
Save sangrepura/240233decd9986ef9e13ad4b95079f49 to your computer and use it in GitHub Desktop.
A youtube downloader script that supports playlist urls and downloads all songs in highest quality
#!/usr/bin/env python3
"""
YouTube Audio Downloader Script
Downloads best quality audio from URLs listed in a file using yt-dlp
PREREQUISITES:
=============
Ubuntu/Debian:
--------------
sudo apt update
sudo apt install python3 python3-pip ffmpeg
pip3 install yt-dlp mutagen
Arch Linux:
-----------
sudo pacman -S python python-pip ffmpeg
pip install yt-dlp mutagen
# Alternative using AUR (optional):
# yay -S yt-dlp python-mutagen
Other Systems:
--------------
1. Install Python 3.6+ and pip
2. Install FFmpeg from https://ffmpeg.org/download.html
3. Run: pip install yt-dlp mutagen
Required Dependencies:
- Python 3.6+
- yt-dlp (for downloading)
- mutagen (for metadata handling)
- ffmpeg (for audio conversion)
"""
import argparse
import os
import sys
import subprocess
import json
from pathlib import Path
def check_dependencies():
"""Check if all required dependencies are installed"""
missing = []
# Check yt-dlp
try:
subprocess.run(['yt-dlp', '--version'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
missing.append('yt-dlp')
# Check ffmpeg
try:
subprocess.run(['ffmpeg', '-version'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
missing.append('ffmpeg')
# Check mutagen
try:
import mutagen
except ImportError:
missing.append('python-mutagen')
return missing
def print_installation_instructions(missing_deps):
"""Print installation instructions for missing dependencies"""
print("Missing dependencies:", ", ".join(missing_deps))
print("\nInstallation instructions:")
print("=" * 50)
print("\nUbuntu/Debian:")
print("sudo apt update")
print("sudo apt install python3 python3-pip ffmpeg")
print("pip3 install yt-dlp mutagen")
print("\nArch Linux:")
print("sudo pacman -S python python-pip ffmpeg")
print("pip install yt-dlp mutagen")
print("# Alternative: yay -S yt-dlp python-mutagen")
print("\nOther systems:")
print("1. Install Python 3.6+ and pip")
print("2. Install FFmpeg from https://ffmpeg.org/download.html")
print("3. Run: pip install yt-dlp mutagen")
def check_ytdlp_installed():
"""Check if yt-dlp is installed and accessible (legacy function)"""
try:
subprocess.run(['yt-dlp', '--version'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def read_urls_from_file(file_path):
"""Read URLs from file, filtering out empty lines and comments"""
urls = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
# Skip empty lines and comments
if line and not line.startswith('#'):
urls.append((line, line_num))
return urls
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error reading file '{file_path}': {e}")
sys.exit(1)
def is_playlist_url(url):
"""Check if URL appears to be a playlist"""
playlist_indicators = [
'playlist?list=',
'watch?v=.*&list=',
'/sets/', # SoundCloud
'/albums/',
'playlist',
'album'
]
return any(indicator in url.lower() for indicator in playlist_indicators)
def extract_playlist_urls(playlist_url):
"""Extract individual video URLs from a playlist"""
print(f"Extracting URLs from playlist: {playlist_url}")
cmd = [
'yt-dlp',
'--flat-playlist', # Don't download, just extract URLs
'--print', 'webpage_url', # Print the webpage URL of each video
'--ignore-errors', # Continue on errors
playlist_url
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
if result.returncode == 0:
urls = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
print(f" Found {len(urls)} videos in playlist")
return urls
else:
print(f" Failed to extract playlist URLs")
if result.stderr:
print(f" Error: {result.stderr.strip()}")
return []
except Exception as e:
print(f" Exception while extracting playlist: {e}")
return []
def expand_urls(urls_with_line_nums):
"""Expand playlist URLs into individual video URLs"""
expanded_urls = []
for url, original_line_num in urls_with_line_nums:
if is_playlist_url(url):
print(f"[{original_line_num}] Detected playlist URL")
individual_urls = extract_playlist_urls(url)
if individual_urls:
for i, individual_url in enumerate(individual_urls, 1):
# Create a compound line number for playlist items
expanded_line_num = f"{original_line_num}.{i}"
expanded_urls.append((individual_url, expanded_line_num))
else:
print(f"[{original_line_num}] No URLs extracted from playlist, skipping")
else:
# Regular URL, add as-is
expanded_urls.append((url, original_line_num))
return expanded_urls
def download_audio(url, output_dir, line_num, add_metadata=True, write_info_json=True):
"""Download best quality audio from a single URL"""
print(f"[{line_num}] Downloading: {url}")
# Custom output template for consistent naming: song - artist - album - year
output_template = f'{output_dir}/%(title)s - %(artist,uploader)s - %(album)s - %(release_year,upload_date>%Y)s.%(ext)s'
# yt-dlp command with best audio quality options
cmd = [
'yt-dlp',
'--extract-audio', # Extract audio only
'--audio-format', 'best', # Keep best available audio format
'--audio-quality', '0', # Best quality (0 = best, 10 = worst)
'--output', output_template, # Custom filename template
'--no-playlist', # Download single video even if URL is playlist
'--ignore-errors', # Continue on download errors
url
]
# Add optional metadata features
if add_metadata:
cmd.extend([
'--add-metadata', # Add metadata to audio file
'--embed-thumbnail', # Embed thumbnail as cover art (if possible)
])
if write_info_json:
cmd.append('--write-info-json') # Write metadata to .info.json file
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
if result.returncode == 0:
print(f"[{line_num}] ✓ Success: {url}")
return True
else:
print(f"[{line_num}] ✗ Failed: {url}")
if result.stderr:
print(f" Error: {result.stderr.strip()}")
return False
except Exception as e:
print(f"[{line_num}] ✗ Exception while downloading {url}: {e}")
return False
def sanitize_filename_template():
"""Return the output template with fallback values for missing metadata"""
# This template handles missing fields gracefully:
# - %(artist,uploader)s: Use artist if available, otherwise uploader
# - %(album)s: Use album if available, otherwise empty
# - %(release_year,upload_date>%Y)s: Use release_year if available, otherwise extract year from upload_date
return '%(title)s - %(artist,uploader)s - %(album)s - %(release_year,upload_date>%Y)s'
def main():
parser = argparse.ArgumentParser(
description='Download best quality audio from URLs using yt-dlp (supports playlists)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s -f urls.txt -d ./downloads
%(prog)s --file video_urls.txt --dir /home/user/music --no-metadata
File format:
One URL per line. Lines starting with # are treated as comments.
Empty lines are ignored.
Supports both individual video URLs and playlist URLs:
- Individual: https://www.youtube.com/watch?v=dQw4w9WgXcQ
- Playlist: https://www.youtube.com/playlist?list=PLxyz...
- Mixed: https://www.youtube.com/watch?v=abc&list=PLxyz...
Playlist URLs will be automatically expanded to individual video URLs.
Output naming:
Files are saved as: "Song Title - Artist - Album - Year.ext"
- Uses artist metadata if available, otherwise uploader name
- Uses album metadata if available, otherwise blank
- Uses release year if available, otherwise upload year
Example: "Bohemian Rhapsody - Queen - A Night at the Opera - 1975.m4a"
"""
)
parser.add_argument('-f', '--file',
required=True,
help='File containing URLs (one per line)')
parser.add_argument('-d', '--dir',
required=True,
help='Output directory for downloaded audio files')
parser.add_argument('--no-playlist-expansion',
action='store_true',
help='Disable automatic playlist expansion (download playlists as-is)')
parser.add_argument('--no-metadata',
action='store_true',
help='Skip adding metadata and thumbnail embedding')
parser.add_argument('--no-info-json',
action='store_true',
help='Skip writing .info.json metadata files')
args = parser.parse_args()
# Check if all dependencies are installed
missing_deps = check_dependencies()
if missing_deps:
print("Error: Missing required dependencies.")
print_installation_instructions(missing_deps)
sys.exit(1)
# Create output directory if it doesn't exist
output_dir = Path(args.dir)
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception as e:
print(f"Error creating output directory '{args.dir}': {e}")
sys.exit(1)
# Read URLs from file
urls = read_urls_from_file(args.file)
if not urls:
print(f"No URLs found in '{args.file}'")
sys.exit(1)
print(f"Found {len(urls)} URLs in file")
# Expand playlist URLs to individual videos (unless disabled)
if not args.no_playlist_expansion:
print("Checking for playlists and expanding...")
print("-" * 50)
urls = expand_urls(urls)
print(f"Total URLs to download: {len(urls)}")
print(f"Output directory: {args.dir}")
print("-" * 50)
# Download each URL
successful = 0
failed = 0
for url, line_num in urls:
if download_audio(url, args.dir, line_num,
add_metadata=not args.no_metadata,
write_info_json=not args.no_info_json):
successful += 1
else:
failed += 1
print() # Empty line for readability
# Summary
print("-" * 50)
print(f"Download completed!")
print(f"Successful: {successful}")
print(f"Failed: {failed}")
print(f"Total: {len(urls)}")
if failed > 0:
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment