Skip to content

Instantly share code, notes, and snippets.

@yodaluca23
Last active March 21, 2025 10:24
Show Gist options
  • Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Fetch .lrc files for all songs in directory, from the Beautiful lyrics, API, supports A2 extension (Enhanced LRC format).
import os
import requests
import json
import re
from bs4 import BeautifulSoup
from mutagen import File as MutagenFile
from mutagen.mp4 import MP4
# --- Configuration for A2 extension and gap text ---
def load_config():
if os.path.exists('BLconfig.txt'):
with open('BLconfig.txt', 'r', encoding='utf-8') as config_file:
config = json.load(config_file)
return config.get('useA2'), config.get('gapText')
return None, None
def save_config(useA2, gapText):
with open('BLconfig.txt', 'w', encoding='utf-8') as config_file:
json.dump({'useA2': useA2, 'gapText': gapText}, config_file, ensure_ascii=False)
useA2, gapText = load_config()
if useA2 is None or gapText is None:
useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes'
gapText = input('What text should be displayed for instrumental sections. (Enter "MusicNote" for a music note ♪): ').strip()
if gapText.lower().replace(' ', '') == "musicnote":
gapText = "♪"
save_config(useA2, gapText)
override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes'
# List of supported file extensions
supported_extensions = supported_extensions = [
".asf",
".wma",
".flac",
".mp4",
".m4a",
".ape",
".mp3",
".mpc",
".opus",
".oga",
".spx",
".ogv",
".ogg",
".tta",
".wv",
".ofr",
".aiff",
".aif"
]
def get_metadata(filepath):
"""
Extracts artist and title metadata from an audio file.
Supports common audio formats and m4a (MP4) files.
"""
try:
# For m4a files, use MP4-specific tags.
if filepath.lower().endswith('.m4a'):
audio = MP4(filepath)
tags = audio.tags
# The MP4 tags for artist and title are typically stored with these keys:
artist = tags.get('\xa9ART', [None])[0]
title = tags.get('\xa9nam', [None])[0]
else:
audio = MutagenFile(filepath)
if audio is None or not audio.tags:
print(f"Could not read metadata from {filepath}")
return None, None
tags = audio.tags
artist = None
title = None
# Try common tag keys for MP3 and similar files.
if 'TPE1' in tags:
artist = tags['TPE1'].text[0]
elif 'artist' in tags:
artist = tags['artist'][0] if isinstance(tags['artist'], list) else tags['artist']
if 'TIT2' in tags:
title = tags['TIT2'].text[0]
elif 'title' in tags:
title = tags['title'][0] if isinstance(tags['title'], list) else tags['title']
# Fallback: iterate over all tags
if not artist or not title:
for key, value in tags.items():
key_lower = key.lower()
if not artist and "artist" in key_lower:
artist = value[0] if isinstance(value, list) else value
if not title and "title" in key_lower:
title = value[0] if isinstance(value, list) else value
return (artist.strip() if artist else None), (title.strip() if title else None)
except Exception as e:
print(f"Error reading metadata from {filepath}: {e}")
return None, None
def get_bearer_token():
fetch_url = "https://open.spotify.com"
response = requests.get(fetch_url)
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
session_element = soup.find(id="session")
session_html = session_element.get_text()
tokens = json.loads(session_html)
access_token = tokens['accessToken']
return access_token
def search_spotify(artist, song, token):
url = f'https://api.spotify.com/v1/search?q=artist%3A{artist}%20track%3A{song}&type=track'
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
if data['tracks']['items']:
href = data['tracks']['items'][0]['href']
match = re.search(r'tracks/([a-zA-Z0-9]+)', href)
if match:
song_id = match.group(1)
return song_id
else:
raise ValueError("Song ID not found in the href.")
else:
raise ValueError("No tracks found for the given artist and song.")
else:
raise Exception(f"Spotify API request failed with status code {response.status_code}")
def fetch_lyrics(track_id, token):
url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}'
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200 and response.headers.get('content-length') != '0':
return response.json()
return None
def convert_to_lrc_timestamp(timestamp):
minutes = int(timestamp // 60)
seconds = timestamp % 60
return f"{minutes:02}:{seconds:05.2f}"
def parse_lyrics(data, useA2, gapText):
lyrics = []
prev_end_time = 0 # Initialize previous end time to zero
def add_empty_timestamp_if_gap(start_time, gapText):
nonlocal prev_end_time
if start_time - prev_end_time > 5:
if gapText == '':
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}]"
else:
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}] {gapText}"
lyrics.append(empty_timestamp)
if data['Type'] == 'Line':
if useA2:
print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC")
for item in data['Content']:
if item['Type'] == 'Vocal':
start_time = item['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
line = item['Text']
timestamp = convert_to_lrc_timestamp(start_time)
lyrics.append(f"[{timestamp}] {line.strip()}")
prev_end_time = item['EndTime']
if 'Background' in item:
print("This song has Background with Type Line, please report this song for further support.")
elif data['Type'] == 'Syllable':
if useA2:
for item in data['Content']:
if item['Type'] == 'Vocal':
start_time = item['Lead']['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
syllables = item['Lead']['Syllables']
line = ''
timestamp = convert_to_lrc_timestamp(start_time)
previous_is_part_of_word = False
for syllable in syllables:
syllable_text = syllable['Text']
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
if previous_is_part_of_word:
line += f"{syllable_text}"
else:
line += f" <{syllable_timestamp}> {syllable_text}"
previous_is_part_of_word = syllable['IsPartOfWord']
lyrics.append(f"[{timestamp}]{line.strip()}")
prev_end_time = item['Lead']['EndTime']
if 'Background' in item:
for bg in item['Background']:
start_time = bg['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
syllables = bg['Syllables']
line = ''
timestamp = convert_to_lrc_timestamp(start_time)
for index, syllable in enumerate(syllables):
syllable_text = syllable['Text']
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
if syllable['IsPartOfWord']:
if index == 0:
line += f"({syllable_text}"
elif index == len(syllables) - 1:
line += f"{syllable_text})"
else:
line += f" {syllable_text}"
else:
if index == 0:
line += f" <{syllable_timestamp}> ({syllable_text}"
elif index == len(syllables) - 1:
line += f" <{syllable_timestamp}> {syllable_text})"
else:
line += f" <{syllable_timestamp}> {syllable_text}"
lyrics.append(f"[{timestamp}]{line.strip()}")
prev_end_time = bg['EndTime']
else:
for item in data['Content']:
if item['Type'] == 'Vocal':
start_time = item['Lead']['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
line = ''.join([
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
for syllable in item['Lead']['Syllables']
])
timestamp = convert_to_lrc_timestamp(start_time)
lyrics.append(f"[{timestamp}] {line.strip()}")
prev_end_time = item['Lead']['EndTime']
if 'Background' in item:
for bg in item['Background']:
start_time = bg['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
line = ''.join([
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
for syllable in bg['Syllables']
])
timestamp = convert_to_lrc_timestamp(start_time)
lyrics.append(f"[{timestamp}] ({line.rstrip()})")
prev_end_time = bg['EndTime']
elif data['Type'] == 'Static':
print("The following song is not compatible with LRC, continuing with static Lyrics.")
for item in data['Lines']:
lyrics.append(item['Text'])
return lyrics
def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename):
with open(lrc_filename, 'w', encoding='utf-8') as lrc_file:
lrc_file.write("\n".join(lyrics_body))
if is_time_synced:
base = os.path.splitext(filename)[0]
print(f"Saved time-synced lyrics for \'{base}\'")
else:
base = os.path.splitext(filename)[0]
print(f"Saved non-time-synced lyrics for \'{base}\'")
def main():
token = get_bearer_token()
for item in os.listdir('.'):
if any(item.lower().endswith(ext) for ext in supported_extensions):
artist, title = get_metadata(item)
if not artist or not title:
print(f"Could not extract metadata (artist/title) from '{item}', skipping.")
continue
lrc_filename = os.path.splitext(item)[0] + '.lrc'
if not override_existing and os.path.exists(lrc_filename):
base = os.path.splitext(item)[0]
print(f"Lyrics for '{base}' already exist, skipping")
continue
try:
track_id = search_spotify(artist, title, token)
data = fetch_lyrics(track_id, token)
if data:
lyrics = parse_lyrics(data, useA2, gapText)
save_lyrics(lrc_filename, lyrics, True, item)
else:
print(f"No lyrics found for '{item}'")
except Exception as e:
print(f"Could not save lyrics for '{item}': {e}")
print()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment