yucer · May 18, 2026 21:34
diff --git a/export_github_lists.py b/export_github_lists.py
 #!/usr/bin/env python3
 """
 Export GitHub star lists to markdown files using browser session cookies.

 Usage:
    python3 export_github_lists.py [username] [output_dir]

 Authentication (pick one):
    GITHUB_COOKIES="<full cookie string>"  python3 export_github_lists.py
    GITHUB_TOKEN="ghp_..."                 python3 export_github_lists.py
 """

 import sys
 import os
 import json
 import re
 import urllib.request
 import urllib.error

 USERNAME   = sys.argv[1] if len(sys.argv) > 1 else "yucer"
 OUTPUT_DIR = sys.argv[2] if len(sys.argv) > 2 else "./github_lists"

 COOKIE_STRING = os.environ.get("GITHUB_COOKIES", "")
 GITHUB_TOKEN  = os.environ.get("GITHUB_TOKEN", "")

 GRAPHQL_PUBLIC = "https://api.github.com/graphql"
 GRAPHQL_WEB    = "https://github.com/graphql"

 # ---------------------------------------------------------------------------
 # HTTP helpers
 # ---------------------------------------------------------------------------

 UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"


 def _get(url, extra=None):
    headers = {"User-Agent": UA, "Accept": "text/html,application/xhtml+xml,*/*"}
    if COOKIE_STRING:
        headers["Cookie"] = COOKIE_STRING
    if extra:
        headers.update(extra)
    req = urllib.request.Request(url, headers=headers)
    with urllib.request.urlopen(req) as r:
        return r.read().decode("utf-8", errors="replace")


 def _post_json(url, payload, extra=None):
    data = json.dumps(payload).encode()
    headers = {
        "User-Agent": UA,
        "Content-Type": "application/json",
        "Accept": "application/json",
    }
    if COOKIE_STRING:
        headers["Cookie"] = COOKIE_STRING
    if GITHUB_TOKEN:
        headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
    if extra:
        headers.update(extra)
    req = urllib.request.Request(url, data=data, headers=headers)
    with urllib.request.urlopen(req) as r:
        return json.loads(r.read())


 # ---------------------------------------------------------------------------
 # Strategy A: public GraphQL API with PAT
 # ---------------------------------------------------------------------------

 LISTS_QUERY = """
 query($login: String!, $after: String) {
  user(login: $login) {
    lists(first: 20, after: $after) {
      pageInfo { hasNextPage endCursor }
      nodes {
        name
        slug
        description
        items(first: 100) {
          nodes {
            ... on Repository {
              nameWithOwner
              description
              url
              stargazerCount
              primaryLanguage { name }
            }
          }
        }
      }
    }
  }
 }
 """


 def fetch_via_pat():
    lists, after = [], None
    while True:
        data = _post_json(GRAPHQL_PUBLIC, {"query": LISTS_QUERY,
                                           "variables": {"login": USERNAME, "after": after}})
        if "errors" in data:
            raise RuntimeError(data["errors"])
        nodes = data["data"]["user"]["lists"]
        lists.extend(nodes["nodes"])
        if not nodes["pageInfo"]["hasNextPage"]:
            break
        after = nodes["pageInfo"]["endCursor"]
    return lists


 # ---------------------------------------------------------------------------
 # Strategy B: internal GitHub GraphQL (session cookies + CSRF)
 # ---------------------------------------------------------------------------

 def _get_csrf():
    html = _get(f"https://github.com/{USERNAME}?tab=stars")
    m = re.search(r'<meta name="csrf-token" content="([^"]+)"', html)
    if not m:
        m = re.search(r'"authenticity_token"\s*value="([^"]+)"', html)
    return m.group(1) if m else None


 def fetch_via_cookies():
    csrf = _get_csrf()
    if not csrf:
        raise RuntimeError("Could not find CSRF token — are the cookies still valid?")

    extra = {
        "X-Requested-With": "XMLHttpRequest",
        "X-CSRF-Token": csrf,
        "Referer": f"https://github.com/{USERNAME}?tab=stars",
    }
    lists, after = [], None
    while True:
        data = _post_json(GRAPHQL_WEB,
                          {"query": LISTS_QUERY,
                           "variables": {"login": USERNAME, "after": after}},
                          extra=extra)
        if "errors" in data:
            raise RuntimeError(data["errors"])
        nodes = data["data"]["user"]["lists"]
        lists.extend(nodes["nodes"])
        if not nodes["pageInfo"]["hasNextPage"]:
            break
        after = nodes["pageInfo"]["endCursor"]
    return lists


 # ---------------------------------------------------------------------------
 # Strategy C: HTML scraping fallback
 # ---------------------------------------------------------------------------

 def _parse_lists_from_html(html):
    """Extract list slugs+names from the sidebar on the stars page."""
    # GitHub renders list links like: href="/stars/yucer/lists/my-list"
    return re.findall(
        rf'href="/stars/{USERNAME}/lists/([^"]+)"',
        html
    )


 def _parse_repos_from_html(html):
    repos = []
    # Each starred repo block contains an <h3> with the repo link
    blocks = re.findall(
        r'<h3[^>]*>.*?<a href="/([^"]+)"[^>]*>.*?</h3>',
        html, re.DOTALL
    )
    for path in blocks:
        repos.append({
            "nameWithOwner": path,
            "url": f"https://github.com/{path}",
            "description": "",
            "stargazerCount": 0,
            "primaryLanguage": None,
        })
    # Try richer extraction via data attributes
    for m in re.finditer(
        r'<a[^>]+href="/([^"]+)"[^>]*itemprop="name codeRepository"[^>]*>',
        html
    ):
        path = m.group(1).strip("/")
        repos.append({
            "nameWithOwner": path,
            "url": f"https://github.com/{path}",
            "description": "",
            "stargazerCount": 0,
            "primaryLanguage": None,
        })
    # Deduplicate
    seen, unique = set(), []
    for r in repos:
        if r["nameWithOwner"] not in seen:
            seen.add(r["nameWithOwner"])
            unique.append(r)
    return unique


 def _fetch_list_repos_html(slug):
    html = _get(f"https://github.com/stars/{USERNAME}/lists/{slug}")
    return _parse_repos_from_html(html)


 def fetch_via_scraping():
    html = _get(f"https://github.com/{USERNAME}?tab=stars")
    raw_lists = _parse_lists_from_html(html)
    if not raw_lists:
        raise RuntimeError("Could not find any lists in HTML — page structure may have changed")

    results = []
    for slug in raw_lists:
        print(f"  Scraping list: {slug} ...")
        repos = _fetch_list_repos_html(slug)
        results.append({
            "name": slug,
            "slug": slug,
            "description": "",
            "items": {"nodes": repos},
        })
    return results


 # ---------------------------------------------------------------------------
 # Markdown writer
 # ---------------------------------------------------------------------------

 def slugify(name):
    return re.sub(r"[^\w\-]", "_", name.lower().strip())


 def write_markdown(star_list):
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    name  = star_list["name"]
    desc  = star_list.get("description") or ""
    repos = star_list["items"]["nodes"]

    path = os.path.join(OUTPUT_DIR, f"{slugify(name)}.md")
    with open(path, "w") as f:
        f.write(f"# {name}\n\n")
        if desc:
            f.write(f"{desc}\n\n")
        for repo in repos:
            url   = repo.get("url", "")
            rname = repo.get("nameWithOwner", url)
            rdesc = repo.get("description") or ""
            stars = repo.get("stargazerCount", 0)
            lang  = (repo.get("primaryLanguage") or {}).get("name", "")

            f.write(f"- [{rname}]({url})")
            if rdesc:
                f.write(f" — {rdesc}")
            meta = [x for x in [lang, f"★ {stars}" if stars else ""] if x]
            if meta:
                f.write(f" `{'  |  '.join(meta)}`")
            f.write("\n")

    print(f"  -> {path}  ({len(repos)} repos)")


 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------

 def main():
    if not COOKIE_STRING and not GITHUB_TOKEN:
        print("ERROR: Provide authentication via one of:")
        print("  GITHUB_COOKIES='...'  (copy cookie string from browser DevTools)")
        print("  GITHUB_TOKEN='ghp_...'  (personal access token)")
        sys.exit(1)

    print(f"Fetching star lists for @{USERNAME} ...")

    star_lists = None

    if GITHUB_TOKEN:
        print("  Using PAT via public GraphQL API ...")
        try:
            star_lists = fetch_via_pat()
        except Exception as e:
            print(f"  PAT method failed: {e}")

    if star_lists is None and COOKIE_STRING:
        print("  Using session cookies via internal GraphQL API ...")
        try:
            star_lists = fetch_via_cookies()
        except Exception as e:
            print(f"  Internal GraphQL failed: {e}")
            print("  Falling back to HTML scraping ...")
            try:
                star_lists = fetch_via_scraping()
            except Exception as e2:
                print(f"  HTML scraping also failed: {e2}")
                sys.exit(1)

    if not star_lists:
        print("No star lists found.")
        return

    print(f"\nFound {len(star_lists)} list(s). Writing to '{OUTPUT_DIR}/':\n")
    for sl in star_lists:
        write_markdown(sl)

    print(f"\nDone.")


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Export GitHub star lists to markdown files using browser session cookies.

	Usage:
	python3 export_github_lists.py [username] [output_dir]

	Authentication (pick one):
	GITHUB_COOKIES="<full cookie string>" python3 export_github_lists.py
	GITHUB_TOKEN="ghp_..." python3 export_github_lists.py
	"""

	import sys
	import os
	import json
	import re
	import urllib.request
	import urllib.error

	USERNAME = sys.argv[1] if len(sys.argv) > 1 else "yucer"
	OUTPUT_DIR = sys.argv[2] if len(sys.argv) > 2 else "./github_lists"

	COOKIE_STRING = os.environ.get("GITHUB_COOKIES", "")
	GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")

	GRAPHQL_PUBLIC = "https://api.github.com/graphql"
	GRAPHQL_WEB = "https://github.com/graphql"

	# ---------------------------------------------------------------------------
	# HTTP helpers
	# ---------------------------------------------------------------------------

	UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"


	def _get(url, extra=None):
	headers = {"User-Agent": UA, "Accept": "text/html,application/xhtml+xml,/"}
	if COOKIE_STRING:
	headers["Cookie"] = COOKIE_STRING
	if extra:
	headers.update(extra)
	req = urllib.request.Request(url, headers=headers)
	with urllib.request.urlopen(req) as r:
	return r.read().decode("utf-8", errors="replace")


	def _post_json(url, payload, extra=None):
	data = json.dumps(payload).encode()
	headers = {
	"User-Agent": UA,
	"Content-Type": "application/json",
	"Accept": "application/json",
	}
	if COOKIE_STRING:
	headers["Cookie"] = COOKIE_STRING
	if GITHUB_TOKEN:
	headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
	if extra:
	headers.update(extra)
	req = urllib.request.Request(url, data=data, headers=headers)
	with urllib.request.urlopen(req) as r:
	return json.loads(r.read())


	# ---------------------------------------------------------------------------
	# Strategy A: public GraphQL API with PAT
	# ---------------------------------------------------------------------------

	LISTS_QUERY = """
	query($login: String!, $after: String) {
	user(login: $login) {
	lists(first: 20, after: $after) {
	pageInfo { hasNextPage endCursor }
	nodes {
	name
	slug
	description
	items(first: 100) {
	nodes {
	... on Repository {
	nameWithOwner
	description
	url
	stargazerCount
	primaryLanguage { name }
	}
	}
	}
	}
	}
	}
	}
	"""


	def fetch_via_pat():
	lists, after = [], None
	while True:
	data = _post_json(GRAPHQL_PUBLIC, {"query": LISTS_QUERY,
	"variables": {"login": USERNAME, "after": after}})
	if "errors" in data:
	raise RuntimeError(data["errors"])
	nodes = data["data"]["user"]["lists"]
	lists.extend(nodes["nodes"])
	if not nodes["pageInfo"]["hasNextPage"]:
	break
	after = nodes["pageInfo"]["endCursor"]
	return lists


	# ---------------------------------------------------------------------------
	# Strategy B: internal GitHub GraphQL (session cookies + CSRF)
	# ---------------------------------------------------------------------------

	def _get_csrf():
	html = _get(f"https://github.com/{USERNAME}?tab=stars")
	m = re.search(r'<meta name="csrf-token" content="([^"]+)"', html)
	if not m:
	m = re.search(r'"authenticity_token"\s*value="([^"]+)"', html)
	return m.group(1) if m else None


	def fetch_via_cookies():
	csrf = _get_csrf()
	if not csrf:
	raise RuntimeError("Could not find CSRF token — are the cookies still valid?")

	extra = {
	"X-Requested-With": "XMLHttpRequest",
	"X-CSRF-Token": csrf,
	"Referer": f"https://github.com/{USERNAME}?tab=stars",
	}
	lists, after = [], None
	while True:
	data = _post_json(GRAPHQL_WEB,
	{"query": LISTS_QUERY,
	"variables": {"login": USERNAME, "after": after}},
	extra=extra)
	if "errors" in data:
	raise RuntimeError(data["errors"])
	nodes = data["data"]["user"]["lists"]
	lists.extend(nodes["nodes"])
	if not nodes["pageInfo"]["hasNextPage"]:
	break
	after = nodes["pageInfo"]["endCursor"]
	return lists


	# ---------------------------------------------------------------------------
	# Strategy C: HTML scraping fallback
	# ---------------------------------------------------------------------------

	def _parse_lists_from_html(html):
	"""Extract list slugs+names from the sidebar on the stars page."""
	# GitHub renders list links like: href="/stars/yucer/lists/my-list"
	return re.findall(
	rf'href="/stars/{USERNAME}/lists/([^"]+)"',
	html
	)


	def _parse_repos_from_html(html):
	repos = []
	# Each starred repo block contains an <h3> with the repo link
	blocks = re.findall(
	r'<h3[^>]>.?<a href="/([^"]+)"[^>]>.?</h3>',
	html, re.DOTALL
	)
	for path in blocks:
	repos.append({
	"nameWithOwner": path,
	"url": f"https://github.com/{path}",
	"description": "",
	"stargazerCount": 0,
	"primaryLanguage": None,
	})
	# Try richer extraction via data attributes
	for m in re.finditer(
	r'<a[^>]+href="/([^"]+)"[^>]itemprop="name codeRepository"[^>]>',
	html
	):
	path = m.group(1).strip("/")
	repos.append({
	"nameWithOwner": path,
	"url": f"https://github.com/{path}",
	"description": "",
	"stargazerCount": 0,
	"primaryLanguage": None,
	})
	# Deduplicate
	seen, unique = set(), []
	for r in repos:
	if r["nameWithOwner"] not in seen:
	seen.add(r["nameWithOwner"])
	unique.append(r)
	return unique


	def _fetch_list_repos_html(slug):
	html = _get(f"https://github.com/stars/{USERNAME}/lists/{slug}")
	return _parse_repos_from_html(html)


	def fetch_via_scraping():
	html = _get(f"https://github.com/{USERNAME}?tab=stars")
	raw_lists = _parse_lists_from_html(html)
	if not raw_lists:
	raise RuntimeError("Could not find any lists in HTML — page structure may have changed")

	results = []
	for slug in raw_lists:
	print(f" Scraping list: {slug} ...")
	repos = _fetch_list_repos_html(slug)
	results.append({
	"name": slug,
	"slug": slug,
	"description": "",
	"items": {"nodes": repos},
	})
	return results


	# ---------------------------------------------------------------------------
	# Markdown writer
	# ---------------------------------------------------------------------------

	def slugify(name):
	return re.sub(r"[^\w\-]", "_", name.lower().strip())


	def write_markdown(star_list):
	os.makedirs(OUTPUT_DIR, exist_ok=True)
	name = star_list["name"]
	desc = star_list.get("description") or ""
	repos = star_list["items"]["nodes"]

	path = os.path.join(OUTPUT_DIR, f"{slugify(name)}.md")
	with open(path, "w") as f:
	f.write(f"# {name}\n\n")
	if desc:
	f.write(f"{desc}\n\n")
	for repo in repos:
	url = repo.get("url", "")
	rname = repo.get("nameWithOwner", url)
	rdesc = repo.get("description") or ""
	stars = repo.get("stargazerCount", 0)
	lang = (repo.get("primaryLanguage") or {}).get("name", "")

	f.write(f"- [{rname}]({url})")
	if rdesc:
	f.write(f" — {rdesc}")
	meta = [x for x in [lang, f"★ {stars}" if stars else ""] if x]
	if meta:
	f.write(f" `{' \| '.join(meta)}`")
	f.write("\n")

	print(f" -> {path} ({len(repos)} repos)")


	# ---------------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------------

	def main():
	if not COOKIE_STRING and not GITHUB_TOKEN:
	print("ERROR: Provide authentication via one of:")
	print(" GITHUB_COOKIES='...' (copy cookie string from browser DevTools)")
	print(" GITHUB_TOKEN='ghp_...' (personal access token)")
	sys.exit(1)

	print(f"Fetching star lists for @{USERNAME} ...")

	star_lists = None

	if GITHUB_TOKEN:
	print(" Using PAT via public GraphQL API ...")
	try:
	star_lists = fetch_via_pat()
	except Exception as e:
	print(f" PAT method failed: {e}")

	if star_lists is None and COOKIE_STRING:
	print(" Using session cookies via internal GraphQL API ...")
	try:
	star_lists = fetch_via_cookies()
	except Exception as e:
	print(f" Internal GraphQL failed: {e}")
	print(" Falling back to HTML scraping ...")
	try:
	star_lists = fetch_via_scraping()
	except Exception as e2:
	print(f" HTML scraping also failed: {e2}")
	sys.exit(1)

	if not star_lists:
	print("No star lists found.")
	return

	print(f"\nFound {len(star_lists)} list(s). Writing to '{OUTPUT_DIR}/':\n")
	for sl in star_lists:
	write_markdown(sl)

	print(f"\nDone.")


	if __name__ == "__main__":
	main()
No results found