redraw · May 13, 2025 14:45
diff --git a/mixcloud.py b/mixcloud.py
 # /// script
 # requires-python = ">=3.12"
 # dependencies = [
 #     "aiofiles",
 #     "httpx",
 #     "tqdm",
 # ]
 # ///
 import sys
 import json
 import httpx
 import asyncio
 import argparse
 import aiofiles
 import traceback
 from urllib.parse import quote_plus
 from tqdm.asyncio import tqdm


 async def _get_user_id(client, username):
    payload = {
        "query": "query userQuery(\n  $lookup: UserLookup!\n) {\n  user: userLookup(lookup: $lookup) {\n    id\n    username\n    isSelect\n    shouldShowAds\n    ...UserHeadTags_user\n  }\n}\n\nfragment UserHeadTags_user on User {\n  displayName\n  isUploader\n  username\n  picture {\n    urlRoot\n  }\n  twitterAccount {\n    username\n  }\n}\n",
        "variables": {
            "lookup": {
                "username": username,
            },
        },
    }

    response = await client.post("https://app.mixcloud.com/graphql", json=payload)
    response.raise_for_status()
    return response.json()["data"]["user"]["id"]


 async def _get_cloudcast_ids(client, user_id, cursor=None, max_results=None):
    ids = []

    while True:
        payload = {
            "query": "query UserUploadsPagePaginationQuery(\n  $audioTypes: [AudioTypeEnum] = [SHOW]\n  $count: Int = 10\n  $cursor: String\n  $orderBy: CloudcastOrderByEnum = LATEST\n  $id: ID!\n) {\n  node(id: $id) {\n    __typename\n    ...UserUploadsPage_user_3E72Mc\n    id\n  }\n}\n\nfragment UserUploadsPage_user_3E72Mc on User {\n  id\n  displayName\n  username\n  isViewer\n  uploads(first: $count, isPublic: true, after: $cursor, orderBy: $orderBy, audioTypes: $audioTypes) {\n    edges {\n      node {\n        id\n        slug\n__typename\n      }\n      cursor\n    }\n    pageInfo {\n      endCursor\n      hasNextPage\n    }\n  }\n}",
            "variables": {
                "audioTypes": [
                    "SHOW",
                ],
                "count": 20,
                "cursor": cursor,
                "orderBy": "LATEST",
                "id": user_id,
            },
        }

        response = await client.post("https://app.mixcloud.com/graphql", json=payload)
        print(f"[{response.status_code}] {cursor=} {response.url}", file=sys.stderr)
        response.raise_for_status()

        cursor = response.json()["data"]["node"]["uploads"]["pageInfo"]["endCursor"]
        ids.extend(
            [
                edge["node"]["slug"]
                for edge in response.json()["data"]["node"]["uploads"]["edges"]
            ]
        )

        if not cursor or (max_results and len(ids) > max_results):
            break

    return ids[:max_results]


 async def _get_cloudcast_sections(client, username, slug):
    payload = {
        "query": """
 query TracklistAudioPageQuery(
  $lookup: CloudcastLookup!
 ) {
  cloudcast: cloudcastLookup(lookup: $lookup) {
    name
    publishDate
    canShowTracklist
    featuringArtistList
    moreFeaturingArtists
    sections {
      __typename
      ... on TrackSection {
        __typename
        artistName
        songName
      }
      ... on ChapterSection {
        chapter
      }
      ... on Node {
        __isNode: __typename
        id
      }
    }
    id
  }
 }
        """,
        "variables": {
            "lookup": {
                "username": username,
                "slug": slug,
            }
        },
    }

    response = await client.post("https://app.mixcloud.com/graphql", json=payload)
    response.raise_for_status()
    data = response.json()

    if not data["data"]:
        raise Exception(data["errors"][0]["message"])

    return [
        {
            "url": f"https://www.mixcloud.com/{username}/{slug}/",
            "name": data["data"]["cloudcast"]["name"],
            "date": data["data"]["cloudcast"]["publishDate"],
            "slug": slug,
            "artist": section["artistName"],
            "song": section["songName"],
            "youtube": f"https://www.youtube.com/results?search_query={quote_plus(section['artistName'] + ' ' + section['songName'])}",
        }
        for section in data["data"]["cloudcast"]["sections"]
        if section["__typename"] == "TrackSection"
    ]


 async def query_songs_from_uploads(client, username=None, max_results=None):
    user_id = await _get_user_id(client, username)
    cloudcast_ids = await _get_cloudcast_ids(client, user_id, max_results=max_results)
    tasks = [_get_cloudcast_sections(client, username, slug) for slug in cloudcast_ids]
    for sections in tqdm(
        asyncio.as_completed(tasks), total=len(tasks), desc="Downloading"
    ):
        try:
            yield await sections
        except Exception as e:
            traceback.print_exc(file=sys.stderr)


 def cli(username: str, cookie: str, max_results: int):
    headers = {
        "x-mixcloud-client-version": "6fefb6248869200e9a6d8974360c122e0b52fe2c",
        "x-mixcloud-platform": "www",
        "x-requested-with": "XMLHttpRequest",
        "origin": "https://www.mixcloud.com",
        "referer": "https://www.mixcloud.com/",
        "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0",
        "cookie": cookie,
    }

    async def run():
        async with httpx.AsyncClient(headers=headers, timeout=300) as client:
            async with aiofiles.open(f"{username}.jsonl", "a") as f:
                async for sections in query_songs_from_uploads(
                    client, username=username, max_results=max_results
                ):
                    await f.write(json.dumps(sections))
                    await f.write("\n")

    asyncio.run(run())


 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-u",
        "--username",
        required=True,
        help="Username of the user to extract songs from",
    )
    parser.add_argument(
        "-c",
        "--cookie",
        required=True,
        help="Cookie value from browser. This contains CSRF token, and login cookie. Mixcloud requires logged user to return tracks.",
    )
    parser.add_argument(
        "-m",
        "--max-results",
        type=int,
        help="Max uploads to extract songs from. Defaults to all.",
    )
    args = parser.parse_args()
    cli(args.username, args.cookie, args.max_results)
	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "aiofiles",
	# "httpx",
	# "tqdm",
	# ]
	# ///
	import sys
	import json
	import httpx
	import asyncio
	import argparse
	import aiofiles
	import traceback
	from urllib.parse import quote_plus
	from tqdm.asyncio import tqdm


	async def _get_user_id(client, username):
	payload = {
	"query": "query userQuery(\n $lookup: UserLookup!\n) {\n user: userLookup(lookup: $lookup) {\n id\n username\n isSelect\n shouldShowAds\n ...UserHeadTags_user\n }\n}\n\nfragment UserHeadTags_user on User {\n displayName\n isUploader\n username\n picture {\n urlRoot\n }\n twitterAccount {\n username\n }\n}\n",
	"variables": {
	"lookup": {
	"username": username,
	},
	},
	}

	response = await client.post("https://app.mixcloud.com/graphql", json=payload)
	response.raise_for_status()
	return response.json()["data"]["user"]["id"]


	async def _get_cloudcast_ids(client, user_id, cursor=None, max_results=None):
	ids = []

	while True:
	payload = {
	"query": "query UserUploadsPagePaginationQuery(\n $audioTypes: [AudioTypeEnum] = [SHOW]\n $count: Int = 10\n $cursor: String\n $orderBy: CloudcastOrderByEnum = LATEST\n $id: ID!\n) {\n node(id: $id) {\n __typename\n ...UserUploadsPage_user_3E72Mc\n id\n }\n}\n\nfragment UserUploadsPage_user_3E72Mc on User {\n id\n displayName\n username\n isViewer\n uploads(first: $count, isPublic: true, after: $cursor, orderBy: $orderBy, audioTypes: $audioTypes) {\n edges {\n node {\n id\n slug\n__typename\n }\n cursor\n }\n pageInfo {\n endCursor\n hasNextPage\n }\n }\n}",
	"variables": {
	"audioTypes": [
	"SHOW",
	],
	"count": 20,
	"cursor": cursor,
	"orderBy": "LATEST",
	"id": user_id,
	},
	}

	response = await client.post("https://app.mixcloud.com/graphql", json=payload)
	print(f"[{response.status_code}] {cursor=} {response.url}", file=sys.stderr)
	response.raise_for_status()

	cursor = response.json()["data"]["node"]["uploads"]["pageInfo"]["endCursor"]
	ids.extend(
	[
	edge["node"]["slug"]
	for edge in response.json()["data"]["node"]["uploads"]["edges"]
	]
	)

	if not cursor or (max_results and len(ids) > max_results):
	break

	return ids[:max_results]


	async def _get_cloudcast_sections(client, username, slug):
	payload = {
	"query": """
	query TracklistAudioPageQuery(
	$lookup: CloudcastLookup!
	) {
	cloudcast: cloudcastLookup(lookup: $lookup) {
	name
	publishDate
	canShowTracklist
	featuringArtistList
	moreFeaturingArtists
	sections {
	__typename
	... on TrackSection {
	__typename
	artistName
	songName
	}
	... on ChapterSection {
	chapter
	}
	... on Node {
	__isNode: __typename
	id
	}
	}
	id
	}
	}
	""",
	"variables": {
	"lookup": {
	"username": username,
	"slug": slug,
	}
	},
	}

	response = await client.post("https://app.mixcloud.com/graphql", json=payload)
	response.raise_for_status()
	data = response.json()

	if not data["data"]:
	raise Exception(data["errors"][0]["message"])

	return [
	{
	"url": f"https://www.mixcloud.com/{username}/{slug}/",
	"name": data["data"]["cloudcast"]["name"],
	"date": data["data"]["cloudcast"]["publishDate"],
	"slug": slug,
	"artist": section["artistName"],
	"song": section["songName"],
	"youtube": f"https://www.youtube.com/results?search_query={quote_plus(section['artistName'] + ' ' + section['songName'])}",
	}
	for section in data["data"]["cloudcast"]["sections"]
	if section["__typename"] == "TrackSection"
	]


	async def query_songs_from_uploads(client, username=None, max_results=None):
	user_id = await _get_user_id(client, username)
	cloudcast_ids = await _get_cloudcast_ids(client, user_id, max_results=max_results)
	tasks = [_get_cloudcast_sections(client, username, slug) for slug in cloudcast_ids]
	for sections in tqdm(
	asyncio.as_completed(tasks), total=len(tasks), desc="Downloading"
	):
	try:
	yield await sections
	except Exception as e:
	traceback.print_exc(file=sys.stderr)


	def cli(username: str, cookie: str, max_results: int):
	headers = {
	"x-mixcloud-client-version": "6fefb6248869200e9a6d8974360c122e0b52fe2c",
	"x-mixcloud-platform": "www",
	"x-requested-with": "XMLHttpRequest",
	"origin": "https://www.mixcloud.com",
	"referer": "https://www.mixcloud.com/",
	"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0",
	"cookie": cookie,
	}

	async def run():
	async with httpx.AsyncClient(headers=headers, timeout=300) as client:
	async with aiofiles.open(f"{username}.jsonl", "a") as f:
	async for sections in query_songs_from_uploads(
	client, username=username, max_results=max_results
	):
	await f.write(json.dumps(sections))
	await f.write("\n")

	asyncio.run(run())


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"-u",
	"--username",
	required=True,
	help="Username of the user to extract songs from",
	)
	parser.add_argument(
	"-c",
	"--cookie",
	required=True,
	help="Cookie value from browser. This contains CSRF token, and login cookie. Mixcloud requires logged user to return tracks.",
	)
	parser.add_argument(
	"-m",
	"--max-results",
	type=int,
	help="Max uploads to extract songs from. Defaults to all.",
	)
	args = parser.parse_args()
	cli(args.username, args.cookie, args.max_results)