yodaluca23 · March 21, 2025 10:24
diff --git a/BeautifulLyricsSaveLyrics.py b/BeautifulLyricsSaveLyrics.py
 import os
 import requests
 import json
 import re
 from bs4 import BeautifulSoup
 from mutagen import File as MutagenFile
 from mutagen.mp4 import MP4

 # --- Configuration for A2 extension and gap text ---
 def load_config():
    if os.path.exists('BLconfig.txt'):
        with open('BLconfig.txt', 'r', encoding='utf-8') as config_file:
            config = json.load(config_file)
            return config.get('useA2'), config.get('gapText')
    return None, None

 def save_config(useA2, gapText):
    with open('BLconfig.txt', 'w', encoding='utf-8') as config_file:
        json.dump({'useA2': useA2, 'gapText': gapText}, config_file, ensure_ascii=False)

 useA2, gapText = load_config()
 if useA2 is None or gapText is None:
    useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes'
    gapText = input('What text should be displayed for instrumental sections. (Enter "MusicNote" for a music note ♪): ').strip()
    if gapText.lower().replace(' ', '') == "musicnote":
        gapText = "♪"
    save_config(useA2, gapText)

 override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes'

 # List of supported file extensions
 supported_extensions = supported_extensions = [
  ".asf",
  ".wma",
  ".flac",
  ".mp4",
  ".m4a",
  ".ape",
  ".mp3",
  ".mpc",
  ".opus",
  ".oga",
  ".spx",
  ".ogv",
  ".ogg",
  ".tta",
  ".wv",
  ".ofr",
  ".aiff",
  ".aif"
 ]

 def get_metadata(filepath):
    """
    Extracts artist and title metadata from an audio file.
    Supports common audio formats and m4a (MP4) files.
    """
    try:
        # For m4a files, use MP4-specific tags.
        if filepath.lower().endswith('.m4a'):
            audio = MP4(filepath)
            tags = audio.tags
            # The MP4 tags for artist and title are typically stored with these keys:
            artist = tags.get('\xa9ART', [None])[0]
            title = tags.get('\xa9nam', [None])[0]
        else:
            audio = MutagenFile(filepath)
            if audio is None or not audio.tags:
                print(f"Could not read metadata from {filepath}")
                return None, None
            tags = audio.tags
            artist = None
            title = None
            # Try common tag keys for MP3 and similar files.
            if 'TPE1' in tags:
                artist = tags['TPE1'].text[0]
            elif 'artist' in tags:
                artist = tags['artist'][0] if isinstance(tags['artist'], list) else tags['artist']

            if 'TIT2' in tags:
                title = tags['TIT2'].text[0]
            elif 'title' in tags:
                title = tags['title'][0] if isinstance(tags['title'], list) else tags['title']

            # Fallback: iterate over all tags
            if not artist or not title:
                for key, value in tags.items():
                    key_lower = key.lower()
                    if not artist and "artist" in key_lower:
                        artist = value[0] if isinstance(value, list) else value
                    if not title and "title" in key_lower:
                        title = value[0] if isinstance(value, list) else value

        return (artist.strip() if artist else None), (title.strip() if title else None)
    except Exception as e:
        print(f"Error reading metadata from {filepath}: {e}")
        return None, None

 def get_bearer_token():
    fetch_url = "https://open.spotify.com"
    response = requests.get(fetch_url)
    response.raise_for_status()
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    session_element = soup.find(id="session")
    session_html = session_element.get_text()
    tokens = json.loads(session_html)
    access_token = tokens['accessToken']
    return access_token

 def search_spotify(artist, song, token):
    url = f'https://api.spotify.com/v1/search?q=artist%3A{artist}%20track%3A{song}&type=track'
    headers = {
        'Authorization': f'Bearer {token}'
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        if data['tracks']['items']:
            href = data['tracks']['items'][0]['href']
            match = re.search(r'tracks/([a-zA-Z0-9]+)', href)
            if match:
                song_id = match.group(1)
                return song_id
            else:
                raise ValueError("Song ID not found in the href.")
        else:
            raise ValueError("No tracks found for the given artist and song.")
    else:
        raise Exception(f"Spotify API request failed with status code {response.status_code}")

 def fetch_lyrics(track_id, token):
    url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}'
    headers = {
        'Authorization': f'Bearer {token}'
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200 and response.headers.get('content-length') != '0':
        return response.json()
    return None

 def convert_to_lrc_timestamp(timestamp):
    minutes = int(timestamp // 60)
    seconds = timestamp % 60
    return f"{minutes:02}:{seconds:05.2f}"

 def parse_lyrics(data, useA2, gapText):
    lyrics = []
    prev_end_time = 0  # Initialize previous end time to zero

    def add_empty_timestamp_if_gap(start_time, gapText):
        nonlocal prev_end_time
        if start_time - prev_end_time > 5:
            if gapText == '':
                empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}]"
            else:
                empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}] {gapText}"
            lyrics.append(empty_timestamp)

    if data['Type'] == 'Line':
        if useA2:
            print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC")
        for item in data['Content']:
            if item['Type'] == 'Vocal':
                start_time = item['StartTime']
                add_empty_timestamp_if_gap(start_time, gapText)
                line = item['Text']
                timestamp = convert_to_lrc_timestamp(start_time)
                lyrics.append(f"[{timestamp}] {line.strip()}")
                prev_end_time = item['EndTime']
            if 'Background' in item:
                print("This song has Background with Type Line, please report this song for further support.")
    elif data['Type'] == 'Syllable':
        if useA2:
            for item in data['Content']:
                if item['Type'] == 'Vocal':
                    start_time = item['Lead']['StartTime']
                    add_empty_timestamp_if_gap(start_time, gapText)
                    syllables = item['Lead']['Syllables']
                    line = ''
                    timestamp = convert_to_lrc_timestamp(start_time)
                    previous_is_part_of_word = False
                    for syllable in syllables:
                        syllable_text = syllable['Text']
                        syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
                        if previous_is_part_of_word:
                            line += f"{syllable_text}"
                        else:
                            line += f" <{syllable_timestamp}> {syllable_text}"
                        previous_is_part_of_word = syllable['IsPartOfWord']
                    lyrics.append(f"[{timestamp}]{line.strip()}")
                    prev_end_time = item['Lead']['EndTime']
                if 'Background' in item:
                    for bg in item['Background']:
                        start_time = bg['StartTime']
                        add_empty_timestamp_if_gap(start_time, gapText)
                        syllables = bg['Syllables']
                        line = ''
                        timestamp = convert_to_lrc_timestamp(start_time)
                        for index, syllable in enumerate(syllables):
                            syllable_text = syllable['Text']
                            syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
                            if syllable['IsPartOfWord']:
                                if index == 0:
                                    line += f"({syllable_text}"
                                elif index == len(syllables) - 1:
                                    line += f"{syllable_text})"
                                else:
                                    line += f" {syllable_text}"
                            else:
                                if index == 0:
                                    line += f" <{syllable_timestamp}> ({syllable_text}"
                                elif index == len(syllables) - 1:
                                    line += f" <{syllable_timestamp}> {syllable_text})"
                                else:
                                    line += f" <{syllable_timestamp}> {syllable_text}"
                        lyrics.append(f"[{timestamp}]{line.strip()}")
                        prev_end_time = bg['EndTime']
        else:
            for item in data['Content']:
                if item['Type'] == 'Vocal':
                    start_time = item['Lead']['StartTime']
                    add_empty_timestamp_if_gap(start_time, gapText)
                    line = ''.join([
                        f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
                        for syllable in item['Lead']['Syllables']
                    ])
                    timestamp = convert_to_lrc_timestamp(start_time)
                    lyrics.append(f"[{timestamp}] {line.strip()}")
                    prev_end_time = item['Lead']['EndTime']
                if 'Background' in item:
                    for bg in item['Background']:
                        start_time = bg['StartTime']
                        add_empty_timestamp_if_gap(start_time, gapText)
                        line = ''.join([
                            f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
                            for syllable in bg['Syllables']
                        ])
                        timestamp = convert_to_lrc_timestamp(start_time)
                        lyrics.append(f"[{timestamp}] ({line.rstrip()})")
                        prev_end_time = bg['EndTime']
    elif data['Type'] == 'Static':
        print("The following song is not compatible with LRC, continuing with static Lyrics.")
        for item in data['Lines']:
            lyrics.append(item['Text'])
    return lyrics

 def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename):
    with open(lrc_filename, 'w', encoding='utf-8') as lrc_file:
        lrc_file.write("\n".join(lyrics_body))
    if is_time_synced:
        base = os.path.splitext(filename)[0]
        print(f"Saved time-synced lyrics for \'{base}\'")
    else:
        base = os.path.splitext(filename)[0]
        print(f"Saved non-time-synced lyrics for \'{base}\'")

 def main():
    token = get_bearer_token()
    for item in os.listdir('.'):
        if any(item.lower().endswith(ext) for ext in supported_extensions):
            artist, title = get_metadata(item)
            if not artist or not title:
                print(f"Could not extract metadata (artist/title) from '{item}', skipping.")
                continue

            lrc_filename = os.path.splitext(item)[0] + '.lrc'
            if not override_existing and os.path.exists(lrc_filename):
                base = os.path.splitext(item)[0]
                print(f"Lyrics for '{base}' already exist, skipping")
                continue
            try:
                track_id = search_spotify(artist, title, token)
                data = fetch_lyrics(track_id, token)
                if data:
                    lyrics = parse_lyrics(data, useA2, gapText)
                    save_lyrics(lrc_filename, lyrics, True, item)
                else:
                    print(f"No lyrics found for '{item}'")
            except Exception as e:
                print(f"Could not save lyrics for '{item}': {e}")
        print()

 if __name__ == "__main__":
    main()
	import os
	import requests
	import json
	import re
	from bs4 import BeautifulSoup
	from mutagen import File as MutagenFile
	from mutagen.mp4 import MP4

	# --- Configuration for A2 extension and gap text ---
	def load_config():
	if os.path.exists('BLconfig.txt'):
	with open('BLconfig.txt', 'r', encoding='utf-8') as config_file:
	config = json.load(config_file)
	return config.get('useA2'), config.get('gapText')
	return None, None

	def save_config(useA2, gapText):
	with open('BLconfig.txt', 'w', encoding='utf-8') as config_file:
	json.dump({'useA2': useA2, 'gapText': gapText}, config_file, ensure_ascii=False)

	useA2, gapText = load_config()
	if useA2 is None or gapText is None:
	useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes'
	gapText = input('What text should be displayed for instrumental sections. (Enter "MusicNote" for a music note ♪): ').strip()
	if gapText.lower().replace(' ', '') == "musicnote":
	gapText = "♪"
	save_config(useA2, gapText)

	override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes'

	# List of supported file extensions
	supported_extensions = supported_extensions = [
	".asf",
	".wma",
	".flac",
	".mp4",
	".m4a",
	".ape",
	".mp3",
	".mpc",
	".opus",
	".oga",
	".spx",
	".ogv",
	".ogg",
	".tta",
	".wv",
	".ofr",
	".aiff",
	".aif"
	]

	def get_metadata(filepath):
	"""
	Extracts artist and title metadata from an audio file.
	Supports common audio formats and m4a (MP4) files.
	"""
	try:
	# For m4a files, use MP4-specific tags.
	if filepath.lower().endswith('.m4a'):
	audio = MP4(filepath)
	tags = audio.tags
	# The MP4 tags for artist and title are typically stored with these keys:
	artist = tags.get('\xa9ART', [None])[0]
	title = tags.get('\xa9nam', [None])[0]
	else:
	audio = MutagenFile(filepath)
	if audio is None or not audio.tags:
	print(f"Could not read metadata from {filepath}")
	return None, None
	tags = audio.tags
	artist = None
	title = None
	# Try common tag keys for MP3 and similar files.
	if 'TPE1' in tags:
	artist = tags['TPE1'].text[0]
	elif 'artist' in tags:
	artist = tags['artist'][0] if isinstance(tags['artist'], list) else tags['artist']

	if 'TIT2' in tags:
	title = tags['TIT2'].text[0]
	elif 'title' in tags:
	title = tags['title'][0] if isinstance(tags['title'], list) else tags['title']

	# Fallback: iterate over all tags
	if not artist or not title:
	for key, value in tags.items():
	key_lower = key.lower()
	if not artist and "artist" in key_lower:
	artist = value[0] if isinstance(value, list) else value
	if not title and "title" in key_lower:
	title = value[0] if isinstance(value, list) else value

	return (artist.strip() if artist else None), (title.strip() if title else None)
	except Exception as e:
	print(f"Error reading metadata from {filepath}: {e}")
	return None, None

	def get_bearer_token():
	fetch_url = "https://open.spotify.com"
	response = requests.get(fetch_url)
	response.raise_for_status()
	html_content = response.text
	soup = BeautifulSoup(html_content, 'html.parser')
	session_element = soup.find(id="session")
	session_html = session_element.get_text()
	tokens = json.loads(session_html)
	access_token = tokens['accessToken']
	return access_token

	def search_spotify(artist, song, token):
	url = f'https://api.spotify.com/v1/search?q=artist%3A{artist}%20track%3A{song}&type=track'
	headers = {
	'Authorization': f'Bearer {token}'
	}
	response = requests.get(url, headers=headers)
	if response.status_code == 200:
	data = response.json()
	if data['tracks']['items']:
	href = data['tracks']['items'][0]['href']
	match = re.search(r'tracks/([a-zA-Z0-9]+)', href)
	if match:
	song_id = match.group(1)
	return song_id
	else:
	raise ValueError("Song ID not found in the href.")
	else:
	raise ValueError("No tracks found for the given artist and song.")
	else:
	raise Exception(f"Spotify API request failed with status code {response.status_code}")

	def fetch_lyrics(track_id, token):
	url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}'
	headers = {
	'Authorization': f'Bearer {token}'
	}
	response = requests.get(url, headers=headers)
	if response.status_code == 200 and response.headers.get('content-length') != '0':
	return response.json()
	return None

	def convert_to_lrc_timestamp(timestamp):
	minutes = int(timestamp // 60)
	seconds = timestamp % 60
	return f"{minutes:02}:{seconds:05.2f}"

	def parse_lyrics(data, useA2, gapText):
	lyrics = []
	prev_end_time = 0 # Initialize previous end time to zero

	def add_empty_timestamp_if_gap(start_time, gapText):
	nonlocal prev_end_time
	if start_time - prev_end_time > 5:
	if gapText == '':
	empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}]"
	else:
	empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}] {gapText}"
	lyrics.append(empty_timestamp)

	if data['Type'] == 'Line':
	if useA2:
	print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC")
	for item in data['Content']:
	if item['Type'] == 'Vocal':
	start_time = item['StartTime']
	add_empty_timestamp_if_gap(start_time, gapText)
	line = item['Text']
	timestamp = convert_to_lrc_timestamp(start_time)
	lyrics.append(f"[{timestamp}] {line.strip()}")
	prev_end_time = item['EndTime']
	if 'Background' in item:
	print("This song has Background with Type Line, please report this song for further support.")
	elif data['Type'] == 'Syllable':
	if useA2:
	for item in data['Content']:
	if item['Type'] == 'Vocal':
	start_time = item['Lead']['StartTime']
	add_empty_timestamp_if_gap(start_time, gapText)
	syllables = item['Lead']['Syllables']
	line = ''
	timestamp = convert_to_lrc_timestamp(start_time)
	previous_is_part_of_word = False
	for syllable in syllables:
	syllable_text = syllable['Text']
	syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
	if previous_is_part_of_word:
	line += f"{syllable_text}"
	else:
	line += f" <{syllable_timestamp}> {syllable_text}"
	previous_is_part_of_word = syllable['IsPartOfWord']
	lyrics.append(f"[{timestamp}]{line.strip()}")
	prev_end_time = item['Lead']['EndTime']
	if 'Background' in item:
	for bg in item['Background']:
	start_time = bg['StartTime']
	add_empty_timestamp_if_gap(start_time, gapText)
	syllables = bg['Syllables']
	line = ''
	timestamp = convert_to_lrc_timestamp(start_time)
	for index, syllable in enumerate(syllables):
	syllable_text = syllable['Text']
	syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
	if syllable['IsPartOfWord']:
	if index == 0:
	line += f"({syllable_text}"
	elif index == len(syllables) - 1:
	line += f"{syllable_text})"
	else:
	line += f" {syllable_text}"
	else:
	if index == 0:
	line += f" <{syllable_timestamp}> ({syllable_text}"
	elif index == len(syllables) - 1:
	line += f" <{syllable_timestamp}> {syllable_text})"
	else:
	line += f" <{syllable_timestamp}> {syllable_text}"
	lyrics.append(f"[{timestamp}]{line.strip()}")
	prev_end_time = bg['EndTime']
	else:
	for item in data['Content']:
	if item['Type'] == 'Vocal':
	start_time = item['Lead']['StartTime']
	add_empty_timestamp_if_gap(start_time, gapText)
	line = ''.join([
	f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
	for syllable in item['Lead']['Syllables']
	])
	timestamp = convert_to_lrc_timestamp(start_time)
	lyrics.append(f"[{timestamp}] {line.strip()}")
	prev_end_time = item['Lead']['EndTime']
	if 'Background' in item:
	for bg in item['Background']:
	start_time = bg['StartTime']
	add_empty_timestamp_if_gap(start_time, gapText)
	line = ''.join([
	f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
	for syllable in bg['Syllables']
	])
	timestamp = convert_to_lrc_timestamp(start_time)
	lyrics.append(f"[{timestamp}] ({line.rstrip()})")
	prev_end_time = bg['EndTime']
	elif data['Type'] == 'Static':
	print("The following song is not compatible with LRC, continuing with static Lyrics.")
	for item in data['Lines']:
	lyrics.append(item['Text'])
	return lyrics

	def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename):
	with open(lrc_filename, 'w', encoding='utf-8') as lrc_file:
	lrc_file.write("\n".join(lyrics_body))
	if is_time_synced:
	base = os.path.splitext(filename)[0]
	print(f"Saved time-synced lyrics for \'{base}\'")
	else:
	base = os.path.splitext(filename)[0]
	print(f"Saved non-time-synced lyrics for \'{base}\'")

	def main():
	token = get_bearer_token()
	for item in os.listdir('.'):
	if any(item.lower().endswith(ext) for ext in supported_extensions):
	artist, title = get_metadata(item)
	if not artist or not title:
	print(f"Could not extract metadata (artist/title) from '{item}', skipping.")
	continue

	lrc_filename = os.path.splitext(item)[0] + '.lrc'
	if not override_existing and os.path.exists(lrc_filename):
	base = os.path.splitext(item)[0]
	print(f"Lyrics for '{base}' already exist, skipping")
	continue
	try:
	track_id = search_spotify(artist, title, token)
	data = fetch_lyrics(track_id, token)
	if data:
	lyrics = parse_lyrics(data, useA2, gapText)
	save_lyrics(lrc_filename, lyrics, True, item)
	else:
	print(f"No lyrics found for '{item}'")
	except Exception as e:
	print(f"Could not save lyrics for '{item}': {e}")
	print()

	if __name__ == "__main__":
	main()