Last active
March 21, 2025 10:24
-
-
Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Fetch .lrc files for all songs in directory, from the Beautiful lyrics, API, supports A2 extension (Enhanced LRC format).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import json | |
import re | |
from bs4 import BeautifulSoup | |
from mutagen import File as MutagenFile | |
from mutagen.mp4 import MP4 | |
# --- Configuration for A2 extension and gap text --- | |
def load_config(): | |
if os.path.exists('BLconfig.txt'): | |
with open('BLconfig.txt', 'r', encoding='utf-8') as config_file: | |
config = json.load(config_file) | |
return config.get('useA2'), config.get('gapText') | |
return None, None | |
def save_config(useA2, gapText): | |
with open('BLconfig.txt', 'w', encoding='utf-8') as config_file: | |
json.dump({'useA2': useA2, 'gapText': gapText}, config_file, ensure_ascii=False) | |
useA2, gapText = load_config() | |
if useA2 is None or gapText is None: | |
useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes' | |
gapText = input('What text should be displayed for instrumental sections. (Enter "MusicNote" for a music note ♪): ').strip() | |
if gapText.lower().replace(' ', '') == "musicnote": | |
gapText = "♪" | |
save_config(useA2, gapText) | |
override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes' | |
# List of supported file extensions | |
supported_extensions = supported_extensions = [ | |
".asf", | |
".wma", | |
".flac", | |
".mp4", | |
".m4a", | |
".ape", | |
".mp3", | |
".mpc", | |
".opus", | |
".oga", | |
".spx", | |
".ogv", | |
".ogg", | |
".tta", | |
".wv", | |
".ofr", | |
".aiff", | |
".aif" | |
] | |
def get_metadata(filepath): | |
""" | |
Extracts artist and title metadata from an audio file. | |
Supports common audio formats and m4a (MP4) files. | |
""" | |
try: | |
# For m4a files, use MP4-specific tags. | |
if filepath.lower().endswith('.m4a'): | |
audio = MP4(filepath) | |
tags = audio.tags | |
# The MP4 tags for artist and title are typically stored with these keys: | |
artist = tags.get('\xa9ART', [None])[0] | |
title = tags.get('\xa9nam', [None])[0] | |
else: | |
audio = MutagenFile(filepath) | |
if audio is None or not audio.tags: | |
print(f"Could not read metadata from {filepath}") | |
return None, None | |
tags = audio.tags | |
artist = None | |
title = None | |
# Try common tag keys for MP3 and similar files. | |
if 'TPE1' in tags: | |
artist = tags['TPE1'].text[0] | |
elif 'artist' in tags: | |
artist = tags['artist'][0] if isinstance(tags['artist'], list) else tags['artist'] | |
if 'TIT2' in tags: | |
title = tags['TIT2'].text[0] | |
elif 'title' in tags: | |
title = tags['title'][0] if isinstance(tags['title'], list) else tags['title'] | |
# Fallback: iterate over all tags | |
if not artist or not title: | |
for key, value in tags.items(): | |
key_lower = key.lower() | |
if not artist and "artist" in key_lower: | |
artist = value[0] if isinstance(value, list) else value | |
if not title and "title" in key_lower: | |
title = value[0] if isinstance(value, list) else value | |
return (artist.strip() if artist else None), (title.strip() if title else None) | |
except Exception as e: | |
print(f"Error reading metadata from {filepath}: {e}") | |
return None, None | |
def get_bearer_token(): | |
fetch_url = "https://open.spotify.com" | |
response = requests.get(fetch_url) | |
response.raise_for_status() | |
html_content = response.text | |
soup = BeautifulSoup(html_content, 'html.parser') | |
session_element = soup.find(id="session") | |
session_html = session_element.get_text() | |
tokens = json.loads(session_html) | |
access_token = tokens['accessToken'] | |
return access_token | |
def search_spotify(artist, song, token): | |
url = f'https://api.spotify.com/v1/search?q=artist%3A{artist}%20track%3A{song}&type=track' | |
headers = { | |
'Authorization': f'Bearer {token}' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
data = response.json() | |
if data['tracks']['items']: | |
href = data['tracks']['items'][0]['href'] | |
match = re.search(r'tracks/([a-zA-Z0-9]+)', href) | |
if match: | |
song_id = match.group(1) | |
return song_id | |
else: | |
raise ValueError("Song ID not found in the href.") | |
else: | |
raise ValueError("No tracks found for the given artist and song.") | |
else: | |
raise Exception(f"Spotify API request failed with status code {response.status_code}") | |
def fetch_lyrics(track_id, token): | |
url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}' | |
headers = { | |
'Authorization': f'Bearer {token}' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200 and response.headers.get('content-length') != '0': | |
return response.json() | |
return None | |
def convert_to_lrc_timestamp(timestamp): | |
minutes = int(timestamp // 60) | |
seconds = timestamp % 60 | |
return f"{minutes:02}:{seconds:05.2f}" | |
def parse_lyrics(data, useA2, gapText): | |
lyrics = [] | |
prev_end_time = 0 # Initialize previous end time to zero | |
def add_empty_timestamp_if_gap(start_time, gapText): | |
nonlocal prev_end_time | |
if start_time - prev_end_time > 5: | |
if gapText == '': | |
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}]" | |
else: | |
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}] {gapText}" | |
lyrics.append(empty_timestamp) | |
if data['Type'] == 'Line': | |
if useA2: | |
print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC") | |
for item in data['Content']: | |
if item['Type'] == 'Vocal': | |
start_time = item['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
line = item['Text'] | |
timestamp = convert_to_lrc_timestamp(start_time) | |
lyrics.append(f"[{timestamp}] {line.strip()}") | |
prev_end_time = item['EndTime'] | |
if 'Background' in item: | |
print("This song has Background with Type Line, please report this song for further support.") | |
elif data['Type'] == 'Syllable': | |
if useA2: | |
for item in data['Content']: | |
if item['Type'] == 'Vocal': | |
start_time = item['Lead']['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
syllables = item['Lead']['Syllables'] | |
line = '' | |
timestamp = convert_to_lrc_timestamp(start_time) | |
previous_is_part_of_word = False | |
for syllable in syllables: | |
syllable_text = syllable['Text'] | |
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime']) | |
if previous_is_part_of_word: | |
line += f"{syllable_text}" | |
else: | |
line += f" <{syllable_timestamp}> {syllable_text}" | |
previous_is_part_of_word = syllable['IsPartOfWord'] | |
lyrics.append(f"[{timestamp}]{line.strip()}") | |
prev_end_time = item['Lead']['EndTime'] | |
if 'Background' in item: | |
for bg in item['Background']: | |
start_time = bg['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
syllables = bg['Syllables'] | |
line = '' | |
timestamp = convert_to_lrc_timestamp(start_time) | |
for index, syllable in enumerate(syllables): | |
syllable_text = syllable['Text'] | |
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime']) | |
if syllable['IsPartOfWord']: | |
if index == 0: | |
line += f"({syllable_text}" | |
elif index == len(syllables) - 1: | |
line += f"{syllable_text})" | |
else: | |
line += f" {syllable_text}" | |
else: | |
if index == 0: | |
line += f" <{syllable_timestamp}> ({syllable_text}" | |
elif index == len(syllables) - 1: | |
line += f" <{syllable_timestamp}> {syllable_text})" | |
else: | |
line += f" <{syllable_timestamp}> {syllable_text}" | |
lyrics.append(f"[{timestamp}]{line.strip()}") | |
prev_end_time = bg['EndTime'] | |
else: | |
for item in data['Content']: | |
if item['Type'] == 'Vocal': | |
start_time = item['Lead']['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
line = ''.join([ | |
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}" | |
for syllable in item['Lead']['Syllables'] | |
]) | |
timestamp = convert_to_lrc_timestamp(start_time) | |
lyrics.append(f"[{timestamp}] {line.strip()}") | |
prev_end_time = item['Lead']['EndTime'] | |
if 'Background' in item: | |
for bg in item['Background']: | |
start_time = bg['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
line = ''.join([ | |
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}" | |
for syllable in bg['Syllables'] | |
]) | |
timestamp = convert_to_lrc_timestamp(start_time) | |
lyrics.append(f"[{timestamp}] ({line.rstrip()})") | |
prev_end_time = bg['EndTime'] | |
elif data['Type'] == 'Static': | |
print("The following song is not compatible with LRC, continuing with static Lyrics.") | |
for item in data['Lines']: | |
lyrics.append(item['Text']) | |
return lyrics | |
def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename): | |
with open(lrc_filename, 'w', encoding='utf-8') as lrc_file: | |
lrc_file.write("\n".join(lyrics_body)) | |
if is_time_synced: | |
base = os.path.splitext(filename)[0] | |
print(f"Saved time-synced lyrics for \'{base}\'") | |
else: | |
base = os.path.splitext(filename)[0] | |
print(f"Saved non-time-synced lyrics for \'{base}\'") | |
def main(): | |
token = get_bearer_token() | |
for item in os.listdir('.'): | |
if any(item.lower().endswith(ext) for ext in supported_extensions): | |
artist, title = get_metadata(item) | |
if not artist or not title: | |
print(f"Could not extract metadata (artist/title) from '{item}', skipping.") | |
continue | |
lrc_filename = os.path.splitext(item)[0] + '.lrc' | |
if not override_existing and os.path.exists(lrc_filename): | |
base = os.path.splitext(item)[0] | |
print(f"Lyrics for '{base}' already exist, skipping") | |
continue | |
try: | |
track_id = search_spotify(artist, title, token) | |
data = fetch_lyrics(track_id, token) | |
if data: | |
lyrics = parse_lyrics(data, useA2, gapText) | |
save_lyrics(lrc_filename, lyrics, True, item) | |
else: | |
print(f"No lyrics found for '{item}'") | |
except Exception as e: | |
print(f"Could not save lyrics for '{item}': {e}") | |
print() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment