Skip to content

Instantly share code, notes, and snippets.

@yucer
Created May 18, 2026 21:34
Show Gist options
  • Select an option

  • Save yucer/ff93237bf6d167847ce6ecfca5156eaf to your computer and use it in GitHub Desktop.

Select an option

Save yucer/ff93237bf6d167847ce6ecfca5156eaf to your computer and use it in GitHub Desktop.
Export github star lists
#!/usr/bin/env python3
"""
Export GitHub star lists to markdown files using browser session cookies.
Usage:
python3 export_github_lists.py [username] [output_dir]
Authentication (pick one):
GITHUB_COOKIES="<full cookie string>" python3 export_github_lists.py
GITHUB_TOKEN="ghp_..." python3 export_github_lists.py
"""
import sys
import os
import json
import re
import urllib.request
import urllib.error
USERNAME = sys.argv[1] if len(sys.argv) > 1 else "yucer"
OUTPUT_DIR = sys.argv[2] if len(sys.argv) > 2 else "./github_lists"
COOKIE_STRING = os.environ.get("GITHUB_COOKIES", "")
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
GRAPHQL_PUBLIC = "https://api.github.com/graphql"
GRAPHQL_WEB = "https://github.com/graphql"
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"
def _get(url, extra=None):
headers = {"User-Agent": UA, "Accept": "text/html,application/xhtml+xml,*/*"}
if COOKIE_STRING:
headers["Cookie"] = COOKIE_STRING
if extra:
headers.update(extra)
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req) as r:
return r.read().decode("utf-8", errors="replace")
def _post_json(url, payload, extra=None):
data = json.dumps(payload).encode()
headers = {
"User-Agent": UA,
"Content-Type": "application/json",
"Accept": "application/json",
}
if COOKIE_STRING:
headers["Cookie"] = COOKIE_STRING
if GITHUB_TOKEN:
headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
if extra:
headers.update(extra)
req = urllib.request.Request(url, data=data, headers=headers)
with urllib.request.urlopen(req) as r:
return json.loads(r.read())
# ---------------------------------------------------------------------------
# Strategy A: public GraphQL API with PAT
# ---------------------------------------------------------------------------
LISTS_QUERY = """
query($login: String!, $after: String) {
user(login: $login) {
lists(first: 20, after: $after) {
pageInfo { hasNextPage endCursor }
nodes {
name
slug
description
items(first: 100) {
nodes {
... on Repository {
nameWithOwner
description
url
stargazerCount
primaryLanguage { name }
}
}
}
}
}
}
}
"""
def fetch_via_pat():
lists, after = [], None
while True:
data = _post_json(GRAPHQL_PUBLIC, {"query": LISTS_QUERY,
"variables": {"login": USERNAME, "after": after}})
if "errors" in data:
raise RuntimeError(data["errors"])
nodes = data["data"]["user"]["lists"]
lists.extend(nodes["nodes"])
if not nodes["pageInfo"]["hasNextPage"]:
break
after = nodes["pageInfo"]["endCursor"]
return lists
# ---------------------------------------------------------------------------
# Strategy B: internal GitHub GraphQL (session cookies + CSRF)
# ---------------------------------------------------------------------------
def _get_csrf():
html = _get(f"https://github.com/{USERNAME}?tab=stars")
m = re.search(r'<meta name="csrf-token" content="([^"]+)"', html)
if not m:
m = re.search(r'"authenticity_token"\s*value="([^"]+)"', html)
return m.group(1) if m else None
def fetch_via_cookies():
csrf = _get_csrf()
if not csrf:
raise RuntimeError("Could not find CSRF token — are the cookies still valid?")
extra = {
"X-Requested-With": "XMLHttpRequest",
"X-CSRF-Token": csrf,
"Referer": f"https://github.com/{USERNAME}?tab=stars",
}
lists, after = [], None
while True:
data = _post_json(GRAPHQL_WEB,
{"query": LISTS_QUERY,
"variables": {"login": USERNAME, "after": after}},
extra=extra)
if "errors" in data:
raise RuntimeError(data["errors"])
nodes = data["data"]["user"]["lists"]
lists.extend(nodes["nodes"])
if not nodes["pageInfo"]["hasNextPage"]:
break
after = nodes["pageInfo"]["endCursor"]
return lists
# ---------------------------------------------------------------------------
# Strategy C: HTML scraping fallback
# ---------------------------------------------------------------------------
def _parse_lists_from_html(html):
"""Extract list slugs+names from the sidebar on the stars page."""
# GitHub renders list links like: href="/stars/yucer/lists/my-list"
return re.findall(
rf'href="/stars/{USERNAME}/lists/([^"]+)"',
html
)
def _parse_repos_from_html(html):
repos = []
# Each starred repo block contains an <h3> with the repo link
blocks = re.findall(
r'<h3[^>]*>.*?<a href="/([^"]+)"[^>]*>.*?</h3>',
html, re.DOTALL
)
for path in blocks:
repos.append({
"nameWithOwner": path,
"url": f"https://github.com/{path}",
"description": "",
"stargazerCount": 0,
"primaryLanguage": None,
})
# Try richer extraction via data attributes
for m in re.finditer(
r'<a[^>]+href="/([^"]+)"[^>]*itemprop="name codeRepository"[^>]*>',
html
):
path = m.group(1).strip("/")
repos.append({
"nameWithOwner": path,
"url": f"https://github.com/{path}",
"description": "",
"stargazerCount": 0,
"primaryLanguage": None,
})
# Deduplicate
seen, unique = set(), []
for r in repos:
if r["nameWithOwner"] not in seen:
seen.add(r["nameWithOwner"])
unique.append(r)
return unique
def _fetch_list_repos_html(slug):
html = _get(f"https://github.com/stars/{USERNAME}/lists/{slug}")
return _parse_repos_from_html(html)
def fetch_via_scraping():
html = _get(f"https://github.com/{USERNAME}?tab=stars")
raw_lists = _parse_lists_from_html(html)
if not raw_lists:
raise RuntimeError("Could not find any lists in HTML — page structure may have changed")
results = []
for slug in raw_lists:
print(f" Scraping list: {slug} ...")
repos = _fetch_list_repos_html(slug)
results.append({
"name": slug,
"slug": slug,
"description": "",
"items": {"nodes": repos},
})
return results
# ---------------------------------------------------------------------------
# Markdown writer
# ---------------------------------------------------------------------------
def slugify(name):
return re.sub(r"[^\w\-]", "_", name.lower().strip())
def write_markdown(star_list):
os.makedirs(OUTPUT_DIR, exist_ok=True)
name = star_list["name"]
desc = star_list.get("description") or ""
repos = star_list["items"]["nodes"]
path = os.path.join(OUTPUT_DIR, f"{slugify(name)}.md")
with open(path, "w") as f:
f.write(f"# {name}\n\n")
if desc:
f.write(f"{desc}\n\n")
for repo in repos:
url = repo.get("url", "")
rname = repo.get("nameWithOwner", url)
rdesc = repo.get("description") or ""
stars = repo.get("stargazerCount", 0)
lang = (repo.get("primaryLanguage") or {}).get("name", "")
f.write(f"- [{rname}]({url})")
if rdesc:
f.write(f" — {rdesc}")
meta = [x for x in [lang, f"★ {stars}" if stars else ""] if x]
if meta:
f.write(f" `{' | '.join(meta)}`")
f.write("\n")
print(f" -> {path} ({len(repos)} repos)")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
if not COOKIE_STRING and not GITHUB_TOKEN:
print("ERROR: Provide authentication via one of:")
print(" GITHUB_COOKIES='...' (copy cookie string from browser DevTools)")
print(" GITHUB_TOKEN='ghp_...' (personal access token)")
sys.exit(1)
print(f"Fetching star lists for @{USERNAME} ...")
star_lists = None
if GITHUB_TOKEN:
print(" Using PAT via public GraphQL API ...")
try:
star_lists = fetch_via_pat()
except Exception as e:
print(f" PAT method failed: {e}")
if star_lists is None and COOKIE_STRING:
print(" Using session cookies via internal GraphQL API ...")
try:
star_lists = fetch_via_cookies()
except Exception as e:
print(f" Internal GraphQL failed: {e}")
print(" Falling back to HTML scraping ...")
try:
star_lists = fetch_via_scraping()
except Exception as e2:
print(f" HTML scraping also failed: {e2}")
sys.exit(1)
if not star_lists:
print("No star lists found.")
return
print(f"\nFound {len(star_lists)} list(s). Writing to '{OUTPUT_DIR}/':\n")
for sl in star_lists:
write_markdown(sl)
print(f"\nDone.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment