Created
May 18, 2026 21:34
-
-
Save yucer/ff93237bf6d167847ce6ecfca5156eaf to your computer and use it in GitHub Desktop.
Export github star lists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Export GitHub star lists to markdown files using browser session cookies. | |
| Usage: | |
| python3 export_github_lists.py [username] [output_dir] | |
| Authentication (pick one): | |
| GITHUB_COOKIES="<full cookie string>" python3 export_github_lists.py | |
| GITHUB_TOKEN="ghp_..." python3 export_github_lists.py | |
| """ | |
| import sys | |
| import os | |
| import json | |
| import re | |
| import urllib.request | |
| import urllib.error | |
| USERNAME = sys.argv[1] if len(sys.argv) > 1 else "yucer" | |
| OUTPUT_DIR = sys.argv[2] if len(sys.argv) > 2 else "./github_lists" | |
| COOKIE_STRING = os.environ.get("GITHUB_COOKIES", "") | |
| GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") | |
| GRAPHQL_PUBLIC = "https://api.github.com/graphql" | |
| GRAPHQL_WEB = "https://github.com/graphql" | |
| # --------------------------------------------------------------------------- | |
| # HTTP helpers | |
| # --------------------------------------------------------------------------- | |
| UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36" | |
| def _get(url, extra=None): | |
| headers = {"User-Agent": UA, "Accept": "text/html,application/xhtml+xml,*/*"} | |
| if COOKIE_STRING: | |
| headers["Cookie"] = COOKIE_STRING | |
| if extra: | |
| headers.update(extra) | |
| req = urllib.request.Request(url, headers=headers) | |
| with urllib.request.urlopen(req) as r: | |
| return r.read().decode("utf-8", errors="replace") | |
| def _post_json(url, payload, extra=None): | |
| data = json.dumps(payload).encode() | |
| headers = { | |
| "User-Agent": UA, | |
| "Content-Type": "application/json", | |
| "Accept": "application/json", | |
| } | |
| if COOKIE_STRING: | |
| headers["Cookie"] = COOKIE_STRING | |
| if GITHUB_TOKEN: | |
| headers["Authorization"] = f"Bearer {GITHUB_TOKEN}" | |
| if extra: | |
| headers.update(extra) | |
| req = urllib.request.Request(url, data=data, headers=headers) | |
| with urllib.request.urlopen(req) as r: | |
| return json.loads(r.read()) | |
| # --------------------------------------------------------------------------- | |
| # Strategy A: public GraphQL API with PAT | |
| # --------------------------------------------------------------------------- | |
| LISTS_QUERY = """ | |
| query($login: String!, $after: String) { | |
| user(login: $login) { | |
| lists(first: 20, after: $after) { | |
| pageInfo { hasNextPage endCursor } | |
| nodes { | |
| name | |
| slug | |
| description | |
| items(first: 100) { | |
| nodes { | |
| ... on Repository { | |
| nameWithOwner | |
| description | |
| url | |
| stargazerCount | |
| primaryLanguage { name } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| """ | |
| def fetch_via_pat(): | |
| lists, after = [], None | |
| while True: | |
| data = _post_json(GRAPHQL_PUBLIC, {"query": LISTS_QUERY, | |
| "variables": {"login": USERNAME, "after": after}}) | |
| if "errors" in data: | |
| raise RuntimeError(data["errors"]) | |
| nodes = data["data"]["user"]["lists"] | |
| lists.extend(nodes["nodes"]) | |
| if not nodes["pageInfo"]["hasNextPage"]: | |
| break | |
| after = nodes["pageInfo"]["endCursor"] | |
| return lists | |
| # --------------------------------------------------------------------------- | |
| # Strategy B: internal GitHub GraphQL (session cookies + CSRF) | |
| # --------------------------------------------------------------------------- | |
| def _get_csrf(): | |
| html = _get(f"https://github.com/{USERNAME}?tab=stars") | |
| m = re.search(r'<meta name="csrf-token" content="([^"]+)"', html) | |
| if not m: | |
| m = re.search(r'"authenticity_token"\s*value="([^"]+)"', html) | |
| return m.group(1) if m else None | |
| def fetch_via_cookies(): | |
| csrf = _get_csrf() | |
| if not csrf: | |
| raise RuntimeError("Could not find CSRF token — are the cookies still valid?") | |
| extra = { | |
| "X-Requested-With": "XMLHttpRequest", | |
| "X-CSRF-Token": csrf, | |
| "Referer": f"https://github.com/{USERNAME}?tab=stars", | |
| } | |
| lists, after = [], None | |
| while True: | |
| data = _post_json(GRAPHQL_WEB, | |
| {"query": LISTS_QUERY, | |
| "variables": {"login": USERNAME, "after": after}}, | |
| extra=extra) | |
| if "errors" in data: | |
| raise RuntimeError(data["errors"]) | |
| nodes = data["data"]["user"]["lists"] | |
| lists.extend(nodes["nodes"]) | |
| if not nodes["pageInfo"]["hasNextPage"]: | |
| break | |
| after = nodes["pageInfo"]["endCursor"] | |
| return lists | |
| # --------------------------------------------------------------------------- | |
| # Strategy C: HTML scraping fallback | |
| # --------------------------------------------------------------------------- | |
| def _parse_lists_from_html(html): | |
| """Extract list slugs+names from the sidebar on the stars page.""" | |
| # GitHub renders list links like: href="/stars/yucer/lists/my-list" | |
| return re.findall( | |
| rf'href="/stars/{USERNAME}/lists/([^"]+)"', | |
| html | |
| ) | |
| def _parse_repos_from_html(html): | |
| repos = [] | |
| # Each starred repo block contains an <h3> with the repo link | |
| blocks = re.findall( | |
| r'<h3[^>]*>.*?<a href="/([^"]+)"[^>]*>.*?</h3>', | |
| html, re.DOTALL | |
| ) | |
| for path in blocks: | |
| repos.append({ | |
| "nameWithOwner": path, | |
| "url": f"https://github.com/{path}", | |
| "description": "", | |
| "stargazerCount": 0, | |
| "primaryLanguage": None, | |
| }) | |
| # Try richer extraction via data attributes | |
| for m in re.finditer( | |
| r'<a[^>]+href="/([^"]+)"[^>]*itemprop="name codeRepository"[^>]*>', | |
| html | |
| ): | |
| path = m.group(1).strip("/") | |
| repos.append({ | |
| "nameWithOwner": path, | |
| "url": f"https://github.com/{path}", | |
| "description": "", | |
| "stargazerCount": 0, | |
| "primaryLanguage": None, | |
| }) | |
| # Deduplicate | |
| seen, unique = set(), [] | |
| for r in repos: | |
| if r["nameWithOwner"] not in seen: | |
| seen.add(r["nameWithOwner"]) | |
| unique.append(r) | |
| return unique | |
| def _fetch_list_repos_html(slug): | |
| html = _get(f"https://github.com/stars/{USERNAME}/lists/{slug}") | |
| return _parse_repos_from_html(html) | |
| def fetch_via_scraping(): | |
| html = _get(f"https://github.com/{USERNAME}?tab=stars") | |
| raw_lists = _parse_lists_from_html(html) | |
| if not raw_lists: | |
| raise RuntimeError("Could not find any lists in HTML — page structure may have changed") | |
| results = [] | |
| for slug in raw_lists: | |
| print(f" Scraping list: {slug} ...") | |
| repos = _fetch_list_repos_html(slug) | |
| results.append({ | |
| "name": slug, | |
| "slug": slug, | |
| "description": "", | |
| "items": {"nodes": repos}, | |
| }) | |
| return results | |
| # --------------------------------------------------------------------------- | |
| # Markdown writer | |
| # --------------------------------------------------------------------------- | |
| def slugify(name): | |
| return re.sub(r"[^\w\-]", "_", name.lower().strip()) | |
| def write_markdown(star_list): | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| name = star_list["name"] | |
| desc = star_list.get("description") or "" | |
| repos = star_list["items"]["nodes"] | |
| path = os.path.join(OUTPUT_DIR, f"{slugify(name)}.md") | |
| with open(path, "w") as f: | |
| f.write(f"# {name}\n\n") | |
| if desc: | |
| f.write(f"{desc}\n\n") | |
| for repo in repos: | |
| url = repo.get("url", "") | |
| rname = repo.get("nameWithOwner", url) | |
| rdesc = repo.get("description") or "" | |
| stars = repo.get("stargazerCount", 0) | |
| lang = (repo.get("primaryLanguage") or {}).get("name", "") | |
| f.write(f"- [{rname}]({url})") | |
| if rdesc: | |
| f.write(f" — {rdesc}") | |
| meta = [x for x in [lang, f"★ {stars}" if stars else ""] if x] | |
| if meta: | |
| f.write(f" `{' | '.join(meta)}`") | |
| f.write("\n") | |
| print(f" -> {path} ({len(repos)} repos)") | |
| # --------------------------------------------------------------------------- | |
| # Main | |
| # --------------------------------------------------------------------------- | |
| def main(): | |
| if not COOKIE_STRING and not GITHUB_TOKEN: | |
| print("ERROR: Provide authentication via one of:") | |
| print(" GITHUB_COOKIES='...' (copy cookie string from browser DevTools)") | |
| print(" GITHUB_TOKEN='ghp_...' (personal access token)") | |
| sys.exit(1) | |
| print(f"Fetching star lists for @{USERNAME} ...") | |
| star_lists = None | |
| if GITHUB_TOKEN: | |
| print(" Using PAT via public GraphQL API ...") | |
| try: | |
| star_lists = fetch_via_pat() | |
| except Exception as e: | |
| print(f" PAT method failed: {e}") | |
| if star_lists is None and COOKIE_STRING: | |
| print(" Using session cookies via internal GraphQL API ...") | |
| try: | |
| star_lists = fetch_via_cookies() | |
| except Exception as e: | |
| print(f" Internal GraphQL failed: {e}") | |
| print(" Falling back to HTML scraping ...") | |
| try: | |
| star_lists = fetch_via_scraping() | |
| except Exception as e2: | |
| print(f" HTML scraping also failed: {e2}") | |
| sys.exit(1) | |
| if not star_lists: | |
| print("No star lists found.") | |
| return | |
| print(f"\nFound {len(star_lists)} list(s). Writing to '{OUTPUT_DIR}/':\n") | |
| for sl in star_lists: | |
| write_markdown(sl) | |
| print(f"\nDone.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment