densumesh · August 3, 2025 03:32
diff --git a/ytm-to-apple-music.py b/ytm-to-apple-music.py
 #!/usr/bin/env python3
 """
 Very small YouTube Music → Apple Music playlist migrator.

 pip install ytmusicapi tqdm requests python-dotenv
 """

 import os, json, time, urllib.parse, requests
 from ytmusicapi import OAuthCredentials, YTMusic  # YouTube Music helper
 from tqdm import tqdm  # progress bars
 from dotenv import load_dotenv  # read .env with your secrets
 import jwt  # for Apple Music JWT token generation
 import re
 from difflib import SequenceMatcher

 load_dotenv()  # expects YT_HEADERS, AM_TEAM_ID, AM_KEY_ID, AM_PRIVATE_KEY,
 # AM_DEVELOPER_TOKEN, AM_USER_TOKEN, AM_STOREFRONT

 # ---- 1. log in to both services -------------------------------------------

 yt_client_id = os.getenv("YT_CLIENT_ID")
 yt_client_secret = os.getenv("YT_CLIENT_SECRET")
 am_private_key = os.getenv("AM_PRIVATE_KEY")
 am_key_id = os.getenv("AM_KEY_ID")
 am_team_id = os.getenv("AM_TEAM_ID")

 yt = YTMusic(
    "oauth.json",
    oauth_credentials=OAuthCredentials(
        client_id=yt_client_id, client_secret=yt_client_secret
    ),
 )

 payload = {
    "iss": am_team_id,
    "iat": int(time.time()),
    "exp": int(time.time()) + 60 * 60 * 24 * 180,  # 180 days
 }
 token = jwt.encode(
    payload, am_private_key, algorithm="ES256", headers={"kid": am_key_id}
 )

 AM_BASE = "https://api.music.apple.com"
 session = requests.Session()
 session.headers.update(
    {
        "Authorization": f"Bearer {token}",
        "Music-User-Token": os.environ["AM_USER_TOKEN"],
        "Content-Type": "application/json",
    }
 )
 storefront = os.getenv("AM_STOREFRONT", "us")


 def normalize_string(s):
    """
    Normalize a string for comparison by removing common variations.
    """
    if not s:
        return ""

    # Convert to lowercase
    s = s.lower()

    # Remove common prefixes/suffixes and parenthetical content
    s = re.sub(r"\s*\(.*?\)\s*", " ", s)  # Remove parentheses content
    s = re.sub(r"\s*\[.*?\]\s*", " ", s)  # Remove bracket content
    s = re.sub(r"\s*feat\.?\s+.*", " ", s)  # Remove "feat." and everything after
    s = re.sub(r"\s*ft\.?\s+.*", " ", s)  # Remove "ft." and everything after
    s = re.sub(r"\s*featuring\s+.*", " ", s)  # Remove "featuring" and everything after
    s = re.sub(r"\s*with\s+.*", " ", s)  # Remove "with" and everything after

    # Remove common suffixes
    s = re.sub(r"\s*-\s*(remaster|remix|edit|version|explicit|clean).*", " ", s)

    # Remove special characters and extra spaces
    s = re.sub(r"[^\w\s]", " ", s)
    s = re.sub(r"\s+", " ", s)

    return s.strip()


 def similarity_score(s1, s2):
    """
    Calculate similarity between two strings using SequenceMatcher.
    """
    return SequenceMatcher(None, s1, s2).ratio()


 def matches_song(yt_title, yt_artist, am_title, am_artist, threshold=0.8):
    """
    Check if YouTube Music song matches Apple Music song using heuristics.

    Args:
        yt_title: YouTube Music song title
        yt_artist: YouTube Music artist name
        am_title: Apple Music song title
        am_artist: Apple Music artist name
        threshold: Minimum similarity score (0-1) to consider a match

    Returns:
        bool: True if songs likely match
    """
    # Normalize all strings
    yt_title_norm = normalize_string(yt_title)
    yt_artist_norm = normalize_string(yt_artist)
    am_title_norm = normalize_string(am_title)
    am_artist_norm = normalize_string(am_artist)

    # Calculate similarity scores
    title_similarity = similarity_score(yt_title_norm, am_title_norm)
    artist_similarity = similarity_score(yt_artist_norm, am_artist_norm)

    # Check for exact matches after normalization
    if yt_title_norm == am_title_norm and yt_artist_norm == am_artist_norm:
        return True

    # Check if both title and artist meet the threshold
    if title_similarity >= threshold and artist_similarity >= threshold:
        return True

    # More lenient check: if one is very high, the other can be lower
    if title_similarity >= 0.9 and artist_similarity >= 0.6:
        return True
    if artist_similarity >= 0.9 and title_similarity >= 0.6:
        return True

    # Check if artist contains the other (for cases like "Artist" vs "Artist, Other Artist")
    if (
        yt_artist_norm in am_artist_norm or am_artist_norm in yt_artist_norm
    ) and title_similarity >= threshold:
        return True

    return False


 def am_search_song(title, artist, want_explicit=False):
    """
    Return an Apple Music catalog song id that matches <title> <artist>.
    Now includes matching validation to ensure the returned song actually matches.
    """
    term = f"{title} - {artist}"

    url = f"{AM_BASE}/v1/catalog/{storefront}/search"
    params = {
        "term": term,
        "types": "songs",
        "limit": 25,
    }  # Increased limit for better matching
    hits = (
        session.get(url, params=params)
        .json()
        .get("results", {})
        .get("songs", {})
        .get("data", [])
    )

    if not hits:
        return None

    # Filter hits to only include songs that actually match our criteria
    matching_hits = []
    for song in hits:
        am_title = song["attributes"].get("name", "")
        am_artist = song["attributes"].get("artistName", "")

        if matches_song(title, artist, am_title, am_artist):
            matching_hits.append(song)

    if not matching_hits:
        return None

    # Separate explicit vs clean hits from matching songs
    explicit_hits = [
        s for s in matching_hits if s["attributes"].get("contentRating") == "explicit"
    ]
    clean_hits = [
        s for s in matching_hits if s["attributes"].get("contentRating") != "explicit"
    ]

    if want_explicit and explicit_hits:
        return explicit_hits[0]["id"]
    if not want_explicit and clean_hits:
        return clean_hits[0]["id"]

    # Fallback: give the first matching result regardless of rating
    return matching_hits[0]["id"]


 def am_create_playlist(
    name, description="Imported from YouTube Music", artwork_url=None
 ):
    payload = {
        "attributes": {"name": name, "description": description},
        "relationships": {"tracks": {"data": []}},
    }

    # Add artwork if provided
    if artwork_url:
        try:
            # Download the image
            img_response = requests.get(artwork_url)
            if img_response.status_code == 200:
                # Convert to base64 for Apple Music API
                import base64

                img_data = base64.b64encode(img_response.content).decode("utf-8")
                payload["attributes"]["artwork"] = {
                    "url": f"data:image/jpeg;base64,{img_data}"
                }
        except Exception as e:
            print(f"  • Warning: Could not set playlist artwork: {e}")

    r = session.post(
        "https://api.music.apple.com/v1/me/library/playlists", json=payload
    )
    r.raise_for_status()
    return r.json()["data"][0]["id"]


 def am_add_tracks(playlist_id, song_ids):
    for batch in [song_ids[i : i + 100] for i in range(0, len(song_ids), 100)]:
        payload = {"data": [{"id": sid, "type": "songs"} for sid in batch]}
        session.post(
            f"https://api.music.apple.com/v1/me/library/playlists/"
            f"{playlist_id}/tracks",
            json=payload,
        ).raise_for_status()


 # ── 2. migrate every playlist ────────────────────────────────────────────────
 for pl in yt.get_library_playlists():
    print(f"\n▶  {pl['title']}")

    # Get playlist artwork URL if available
    artwork_url = None
    thumbnails = pl.get("thumbnails", [])
    if thumbnails:
        # Get the highest quality thumbnail available
        artwork_url = thumbnails[-1].get("url")  # Last item is usually highest quality

    apple_id = am_create_playlist(pl["title"], artwork_url=artwork_url)

    # Get playlist tracks - handle potential pagination
    playlist_data = yt.get_playlist(pl["playlistId"], limit=None)

    yt_tracks = playlist_data.get("tracks", [])

    # Check if there's a continuation token for more tracks
    continuation = playlist_data.get("continuations")
    while continuation:
        try:
            next_data = yt.get_playlist(
                pl["playlistId"],
                continuation=continuation[0]["nextContinuationData"]["continuation"],
            )
            additional_tracks = next_data.get("tracks", [])
            yt_tracks.extend(additional_tracks)
            continuation = next_data.get("continuations")
        except:
            break

    apple_ids = []
    not_found = []
    print(f"  • {len(yt_tracks)} tracks found")

    for t in tqdm(yt_tracks, unit="song", desc="Matching songs"):
        title = t.get("title", "")
        artists_list = t.get("artists", [])
        artist = artists_list[0].get("name", "") if artists_list else ""
        explicit = t.get("isExplicit", False)

        # Skip tracks without essential info
        if not title or not artist:
            not_found.append(f"  • skipped: {title} – {artist} (missing info)")
            continue

        song_id = am_search_song(title, artist, explicit)
        if song_id:
            apple_ids.append(song_id)
        else:
            not_found.append(
                f"  • not found: {title} – {artist} "
                f"({'E' if explicit else 'clean'})"
            )

    # Print all not found songs at once (cleaner output)
    if not_found:
        print("\n".join(not_found))

    am_add_tracks(apple_id, apple_ids)
    print(f"  ✓ {len(apple_ids)}/{len(yt_tracks)} tracks transferred")

 print("\nAll playlists migrated!")
	#!/usr/bin/env python3
	"""
	Very small YouTube Music → Apple Music playlist migrator.

	pip install ytmusicapi tqdm requests python-dotenv
	"""

	import os, json, time, urllib.parse, requests
	from ytmusicapi import OAuthCredentials, YTMusic # YouTube Music helper
	from tqdm import tqdm # progress bars
	from dotenv import load_dotenv # read .env with your secrets
	import jwt # for Apple Music JWT token generation
	import re
	from difflib import SequenceMatcher

	load_dotenv() # expects YT_HEADERS, AM_TEAM_ID, AM_KEY_ID, AM_PRIVATE_KEY,
	# AM_DEVELOPER_TOKEN, AM_USER_TOKEN, AM_STOREFRONT

	# ---- 1. log in to both services -------------------------------------------

	yt_client_id = os.getenv("YT_CLIENT_ID")
	yt_client_secret = os.getenv("YT_CLIENT_SECRET")
	am_private_key = os.getenv("AM_PRIVATE_KEY")
	am_key_id = os.getenv("AM_KEY_ID")
	am_team_id = os.getenv("AM_TEAM_ID")

	yt = YTMusic(
	"oauth.json",
	oauth_credentials=OAuthCredentials(
	client_id=yt_client_id, client_secret=yt_client_secret
	),
	)

	payload = {
	"iss": am_team_id,
	"iat": int(time.time()),
	"exp": int(time.time()) + 60 * 60 * 24 * 180, # 180 days
	}
	token = jwt.encode(
	payload, am_private_key, algorithm="ES256", headers={"kid": am_key_id}
	)

	AM_BASE = "https://api.music.apple.com"
	session = requests.Session()
	session.headers.update(
	{
	"Authorization": f"Bearer {token}",
	"Music-User-Token": os.environ["AM_USER_TOKEN"],
	"Content-Type": "application/json",
	}
	)
	storefront = os.getenv("AM_STOREFRONT", "us")


	def normalize_string(s):
	"""
	Normalize a string for comparison by removing common variations.
	"""
	if not s:
	return ""

	# Convert to lowercase
	s = s.lower()

	# Remove common prefixes/suffixes and parenthetical content
	s = re.sub(r"\s\(.?\)\s*", " ", s) # Remove parentheses content
	s = re.sub(r"\s\[.?\]\s*", " ", s) # Remove bracket content
	s = re.sub(r"\sfeat\.?\s+.", " ", s) # Remove "feat." and everything after
	s = re.sub(r"\sft\.?\s+.", " ", s) # Remove "ft." and everything after
	s = re.sub(r"\sfeaturing\s+.", " ", s) # Remove "featuring" and everything after
	s = re.sub(r"\swith\s+.", " ", s) # Remove "with" and everything after

	# Remove common suffixes
	s = re.sub(r"\s-\s(remaster\|remix\|edit\|version\|explicit\|clean).*", " ", s)

	# Remove special characters and extra spaces
	s = re.sub(r"[^\w\s]", " ", s)
	s = re.sub(r"\s+", " ", s)

	return s.strip()


	def similarity_score(s1, s2):
	"""
	Calculate similarity between two strings using SequenceMatcher.
	"""
	return SequenceMatcher(None, s1, s2).ratio()


	def matches_song(yt_title, yt_artist, am_title, am_artist, threshold=0.8):
	"""
	Check if YouTube Music song matches Apple Music song using heuristics.

	Args:
	yt_title: YouTube Music song title
	yt_artist: YouTube Music artist name
	am_title: Apple Music song title
	am_artist: Apple Music artist name
	threshold: Minimum similarity score (0-1) to consider a match

	Returns:
	bool: True if songs likely match
	"""
	# Normalize all strings
	yt_title_norm = normalize_string(yt_title)
	yt_artist_norm = normalize_string(yt_artist)
	am_title_norm = normalize_string(am_title)
	am_artist_norm = normalize_string(am_artist)

	# Calculate similarity scores
	title_similarity = similarity_score(yt_title_norm, am_title_norm)
	artist_similarity = similarity_score(yt_artist_norm, am_artist_norm)

	# Check for exact matches after normalization
	if yt_title_norm == am_title_norm and yt_artist_norm == am_artist_norm:
	return True

	# Check if both title and artist meet the threshold
	if title_similarity >= threshold and artist_similarity >= threshold:
	return True

	# More lenient check: if one is very high, the other can be lower
	if title_similarity >= 0.9 and artist_similarity >= 0.6:
	return True
	if artist_similarity >= 0.9 and title_similarity >= 0.6:
	return True

	# Check if artist contains the other (for cases like "Artist" vs "Artist, Other Artist")
	if (
	yt_artist_norm in am_artist_norm or am_artist_norm in yt_artist_norm
	) and title_similarity >= threshold:
	return True

	return False


	def am_search_song(title, artist, want_explicit=False):
	"""
	Return an Apple Music catalog song id that matches <title> <artist>.
	Now includes matching validation to ensure the returned song actually matches.
	"""
	term = f"{title} - {artist}"

	url = f"{AM_BASE}/v1/catalog/{storefront}/search"
	params = {
	"term": term,
	"types": "songs",
	"limit": 25,
	} # Increased limit for better matching
	hits = (
	session.get(url, params=params)
	.json()
	.get("results", {})
	.get("songs", {})
	.get("data", [])
	)

	if not hits:
	return None

	# Filter hits to only include songs that actually match our criteria
	matching_hits = []
	for song in hits:
	am_title = song["attributes"].get("name", "")
	am_artist = song["attributes"].get("artistName", "")

	if matches_song(title, artist, am_title, am_artist):
	matching_hits.append(song)

	if not matching_hits:
	return None

	# Separate explicit vs clean hits from matching songs
	explicit_hits = [
	s for s in matching_hits if s["attributes"].get("contentRating") == "explicit"
	]
	clean_hits = [
	s for s in matching_hits if s["attributes"].get("contentRating") != "explicit"
	]

	if want_explicit and explicit_hits:
	return explicit_hits[0]["id"]
	if not want_explicit and clean_hits:
	return clean_hits[0]["id"]

	# Fallback: give the first matching result regardless of rating
	return matching_hits[0]["id"]


	def am_create_playlist(
	name, description="Imported from YouTube Music", artwork_url=None
	):
	payload = {
	"attributes": {"name": name, "description": description},
	"relationships": {"tracks": {"data": []}},
	}

	# Add artwork if provided
	if artwork_url:
	try:
	# Download the image
	img_response = requests.get(artwork_url)
	if img_response.status_code == 200:
	# Convert to base64 for Apple Music API
	import base64

	img_data = base64.b64encode(img_response.content).decode("utf-8")
	payload["attributes"]["artwork"] = {
	"url": f"data:image/jpeg;base64,{img_data}"
	}
	except Exception as e:
	print(f" • Warning: Could not set playlist artwork: {e}")

	r = session.post(
	"https://api.music.apple.com/v1/me/library/playlists", json=payload
	)
	r.raise_for_status()
	return r.json()["data"][0]["id"]


	def am_add_tracks(playlist_id, song_ids):
	for batch in [song_ids[i : i + 100] for i in range(0, len(song_ids), 100)]:
	payload = {"data": [{"id": sid, "type": "songs"} for sid in batch]}
	session.post(
	f"https://api.music.apple.com/v1/me/library/playlists/"
	f"{playlist_id}/tracks",
	json=payload,
	).raise_for_status()


	# ── 2. migrate every playlist ────────────────────────────────────────────────
	for pl in yt.get_library_playlists():
	print(f"\n▶ {pl['title']}")

	# Get playlist artwork URL if available
	artwork_url = None
	thumbnails = pl.get("thumbnails", [])
	if thumbnails:
	# Get the highest quality thumbnail available
	artwork_url = thumbnails[-1].get("url") # Last item is usually highest quality

	apple_id = am_create_playlist(pl["title"], artwork_url=artwork_url)

	# Get playlist tracks - handle potential pagination
	playlist_data = yt.get_playlist(pl["playlistId"], limit=None)

	yt_tracks = playlist_data.get("tracks", [])

	# Check if there's a continuation token for more tracks
	continuation = playlist_data.get("continuations")
	while continuation:
	try:
	next_data = yt.get_playlist(
	pl["playlistId"],
	continuation=continuation[0]["nextContinuationData"]["continuation"],
	)
	additional_tracks = next_data.get("tracks", [])
	yt_tracks.extend(additional_tracks)
	continuation = next_data.get("continuations")
	except:
	break

	apple_ids = []
	not_found = []
	print(f" • {len(yt_tracks)} tracks found")

	for t in tqdm(yt_tracks, unit="song", desc="Matching songs"):
	title = t.get("title", "")
	artists_list = t.get("artists", [])
	artist = artists_list[0].get("name", "") if artists_list else ""
	explicit = t.get("isExplicit", False)

	# Skip tracks without essential info
	if not title or not artist:
	not_found.append(f" • skipped: {title} – {artist} (missing info)")
	continue

	song_id = am_search_song(title, artist, explicit)
	if song_id:
	apple_ids.append(song_id)
	else:
	not_found.append(
	f" • not found: {title} – {artist} "
	f"({'E' if explicit else 'clean'})"
	)

	# Print all not found songs at once (cleaner output)
	if not_found:
	print("\n".join(not_found))

	am_add_tracks(apple_id, apple_ids)
	print(f" ✓ {len(apple_ids)}/{len(yt_tracks)} tracks transferred")

	print("\nAll playlists migrated!")