Last active
August 15, 2025 01:39
-
-
Save ChronoMonochrome/4bbab1933816d0dc3803926d567e9623 to your computer and use it in GitHub Desktop.
Python script to download songs from SoundCloud
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import requests | |
import bs4 | |
import subprocess | |
import base64 | |
from pathvalidate import sanitize_filename | |
# Global proxy setting | |
PROXY = "http://localhost:18080" | |
PROXIES = { | |
'http': PROXY, | |
'https': PROXY, | |
} | |
def load_urls(filename): | |
with open(filename, 'r') as file: | |
urls = file.read().strip().split('\n') | |
return urls | |
def get_audio_codec(filename): | |
command = ["ffprobe", "-v", "error", "-select_streams", "a:0", "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1", filename] | |
try: | |
codec = subprocess.check_output(command, stderr=subprocess.STDOUT).decode().strip() | |
return codec | |
except subprocess.CalledProcessError as e: | |
print(f"Error getting codec for {filename}: {e.output.decode()}") | |
return None | |
def create_default_artwork_image(filename): | |
# Base64 string for a black 512x512 JPEG image | |
base64_image = ( | |
'''/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAQABADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oADAMBAAIRAxAPwD5/ooooA//2Q==''' | |
) | |
image_data = base64.b64decode(base64_image) | |
with open(filename, 'wb') as f: | |
f.write(image_data) | |
def download(url): | |
print(f"Starting download for {url}") | |
try: | |
page = requests.get(url, proxies=PROXIES, timeout=10) | |
page.raise_for_status() | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching page {url}: {e}") | |
return | |
soup = bs4.BeautifulSoup(page.content, 'html.parser') | |
soundtrack_id = soup.find('meta', {'property': 'al:android:url'}).get('content').split(':')[-1] | |
soundtrack_name = soup.find('a', {'itemprop': 'url'}).text | |
base_filename = sanitize_filename(f"{soundtrack_name} [{soundtrack_id}]").replace('\'', '') | |
# Define a temporary filename for the downloaded audio | |
temp_download_filename = f"{base_filename}.tmp" | |
artwork_img_tag = soup.find('meta', {'property': 'twitter:image'}) | |
artwork_filename = 'default_artwork.jpg' | |
if artwork_img_tag: | |
artwork_img_url = artwork_img_tag.get('content') | |
if artwork_img_url: | |
artwork_filename = os.path.split(artwork_img_url)[-1] | |
if not os.path.exists(artwork_filename): | |
try: | |
artwork_img = requests.get(artwork_img_url, proxies=PROXIES, timeout=10) | |
artwork_img.raise_for_status() | |
with open(artwork_filename, 'wb') as f: | |
f.write(artwork_img.content) | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching artwork from {artwork_img_url}: {e}") | |
artwork_filename = 'default_artwork.jpg' | |
if not os.path.exists(artwork_filename): | |
create_default_artwork_image(artwork_filename) | |
# Use a specific format code to get the best quality AAC or MP3 and ensure the file extension is correct. | |
# The --audio-format mp3 ensures that yt-dlp will convert to mp3 if the source is not. | |
yt_dlp_command = [ | |
"yt-dlp", | |
"--proxy", PROXY, | |
"-f", "bestaudio/best", | |
"--audio-format", "mp3", # Force conversion to mp3 | |
"--extract-audio", # Extract audio only | |
url, | |
"-o", f"{base_filename}.%(ext)s", | |
] | |
print(f"Downloading and converting audio with yt-dlp...") | |
try: | |
subprocess.run(yt_dlp_command, check=True, timeout=300) | |
except subprocess.CalledProcessError as e: | |
print(f"yt-dlp command failed with exit code {e.returncode}: {e}") | |
return | |
except subprocess.TimeoutExpired: | |
print("yt-dlp command timed out.") | |
return | |
except FileNotFoundError: | |
print("yt-dlp command not found. Is it installed and in your PATH?") | |
return | |
# Check for the downloaded file with a .mp3 extension, as yt-dlp should have created it | |
soundtrack_filename = f"{base_filename}.mp3" | |
if not os.path.exists(soundtrack_filename): | |
print(f"yt-dlp did not produce the expected MP3 file: {soundtrack_filename}") | |
return | |
out_filename = f"{base_filename}_1.mp3" | |
ffmpeg_embed_command = [ | |
"ffmpeg", | |
"-i", soundtrack_filename, | |
"-i", artwork_filename, | |
"-map", "0:0", | |
"-map", "1:0", | |
"-c", "copy", | |
"-id3v2_version", "3", | |
"-metadata:s:v", "title=\"Album cover\"", | |
"-metadata:s:v", "comment=\"Cover (front)\"", | |
out_filename | |
] | |
print("Embedding artwork...") | |
try: | |
subprocess.run(ffmpeg_embed_command, check=True) | |
except subprocess.CalledProcessError as e: | |
print(f"ffmpeg embedding failed with exit code {e.returncode}: {e}") | |
return | |
# Cleanup temporary files | |
os.remove(soundtrack_filename) | |
if artwork_filename != 'default_artwork.jpg': | |
os.remove(artwork_filename) | |
print(f"Finished download for {url}") | |
if __name__ == "__main__": | |
urls = load_urls('extracted_links.txt') | |
for url in urls: | |
if not url or not url.startswith('http'): | |
print(f'Incorrect or empty URL: {url}') | |
continue | |
try: | |
download(url) | |
except KeyboardInterrupt: | |
print("\nDownload process interrupted by user.") | |
sys.exit(0) | |
except Exception as e: | |
print(f"An unexpected error occurred for {url}: {e}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment