ivorpad · June 18, 2026 11:27
diff --git a/gpt-exporter.py b/gpt-exporter.py
 #!/usr/bin/env python3
 from __future__ import annotations

 import argparse
 import base64
 import json
 import mimetypes
 import os
 import re
 import shlex
 import sys
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 from urllib.error import HTTPError, URLError
 from urllib.parse import quote, unquote, urljoin, urlparse, urlunparse
 from urllib.request import HTTPRedirectHandler, Request, build_opener


 DEFAULT_OUT_DIR = "gpt-export-output"

 CANDIDATE_ENDPOINTS = [
    "/backend-api/files/download/{file_id}?gizmo_id={gizmo_id}&download_intent=true",
    "/backend-api/files/download/{file_id}?gizmo_id={gizmo_id}",
    "/backend-api/files/download/{file_id}?download_intent=true",
    "/backend-api/files/download/{file_id}",
    "/backend-api/files/{file_id}/download?gizmo_id={gizmo_id}",
    "/backend-api/files/{file_id}/content?gizmo_id={gizmo_id}",
    "/backend-api/files/{file_id}/download",
    "/backend-api/files/{file_id}/content",
    "/backend-api/files/{file_id}",
    "/backend-api/files/{file_id}/raw",
    "/backend-api/gizmos/{gizmo_id}/files/{file_id}/download",
    "/backend-api/gizmos/{gizmo_id}/files/{file_id}/content",
 ]

 GIZMO_EXPORT_ENDPOINTS = [
    "/backend-api/gizmos/{gizmo_id}/export",
 ]

 HOP_BY_HOP_HEADERS = {
    ":authority",
    ":method",
    ":path",
    ":scheme",
    "accept-encoding",
    "connection",
    "content-length",
    "host",
    "origin",
    "referer",
    "te",
    "trailer",
    "transfer-encoding",
    "upgrade",
 }

 EXTERNAL_REDIRECT_HEADER_ALLOWLIST = {
    "accept",
    "accept-language",
    "user-agent",
 }

 SENSITIVE_KEY_EXACT = {
    "api_key",
    "apikey",
    "authorization",
    "cookie",
    "credentials",
    "csrf",
    "jwt",
    "password",
    "secret",
    "session",
    "session_id",
    "set-cookie",
    "signature",
    "sig",
    "token",
 }

 SIGNED_URL_KEYS = {
    "content_url",
    "download_url",
    "file_url",
    "href",
    "signed_url",
    "url",
 }


 @dataclass(frozen=True)
 class JsonPayload:
    index: int
    method: str
    url: str
    host: str
    path: str
    status: int
    content_type: str
    data: Any


 @dataclass(frozen=True)
 class FetchResult:
    status: int | None
    headers: dict[str, str]
    body: bytes
    final_url: str
    error: str | None = None


 class NoRedirectHandler(HTTPRedirectHandler):
    def redirect_request(self, req, fp, code, msg, headers, newurl):  # type: ignore[no-untyped-def]
        return None


 def load_har(path: Path) -> dict[str, Any]:
    with path.open("r", encoding="utf-8") as handle:
        data = json.load(handle)
    if not isinstance(data, dict) or "log" not in data:
        raise ValueError(f"{path} does not look like a HAR file")
    return data


 def har_entries(har: dict[str, Any]) -> list[dict[str, Any]]:
    entries = har.get("log", {}).get("entries", [])
    if not isinstance(entries, list):
        return []
    return [entry for entry in entries if isinstance(entry, dict)]


 def decode_har_response_text(content: dict[str, Any]) -> str | None:
    text = content.get("text")
    if not isinstance(text, str):
        return None
    if content.get("encoding") == "base64":
        try:
            return base64.b64decode(text).decode("utf-8", "replace")
        except Exception:
            return None
    return text


 def extract_json_payloads(har: dict[str, Any]) -> list[JsonPayload]:
    payloads: list[JsonPayload] = []
    for index, entry in enumerate(har_entries(har)):
        request = entry.get("request", {})
        response = entry.get("response", {})
        if not isinstance(request, dict) or not isinstance(response, dict):
            continue
        url = request.get("url")
        if not isinstance(url, str):
            continue
        parsed = urlparse(url)
        content = response.get("content", {})
        if not isinstance(content, dict):
            continue
        text = decode_har_response_text(content)
        if not text:
            continue
        stripped = text.lstrip()
        if not stripped.startswith(("{", "[")):
            continue
        try:
            data = json.loads(text)
        except json.JSONDecodeError:
            continue
        payloads.append(
            JsonPayload(
                index=index,
                method=str(request.get("method") or "GET"),
                url=url,
                host=parsed.netloc,
                path=parsed.path,
                status=int(response.get("status") or 0),
                content_type=str(content.get("mimeType") or ""),
                data=data,
            )
        )
    return payloads


 def request_headers_from_entry(entry: dict[str, Any]) -> dict[str, str]:
    request = entry.get("request", {})
    headers: dict[str, str] = {}
    for header in request.get("headers", []) if isinstance(request, dict) else []:
        if not isinstance(header, dict):
            continue
        name = str(header.get("name") or "")
        value = str(header.get("value") or "")
        lower = name.lower()
        if not name or lower in HOP_BY_HOP_HEADERS:
            continue
        value = value.replace("\r", "").replace("\n", "")
        headers[name] = value
    if not any(name.lower() == "cookie" for name in headers):
        cookies = request.get("cookies", []) if isinstance(request, dict) else []
        cookie_parts = []
        for cookie in cookies:
            if not isinstance(cookie, dict):
                continue
            name = str(cookie.get("name") or "")
            value = str(cookie.get("value") or "")
            if name:
                cookie_parts.append(f"{name}={value}")
        if cookie_parts:
            headers["Cookie"] = "; ".join(cookie_parts)
    headers.setdefault("accept", "*/*")
    headers.setdefault("user-agent", "Mozilla/5.0")
    return headers


 def has_auth_material(headers: dict[str, str]) -> bool:
    return any(name.lower() in {"authorization", "cookie"} for name in headers)


 def merge_runtime_auth_headers(
    headers: dict[str, str],
    cookie_env: str | None = None,
    authorization_env: str | None = None,
    curl_file: Path | None = None,
 ) -> dict[str, str]:
    merged = dict(headers)
    if curl_file:
        merged.update(auth_headers_from_curl_file(curl_file))
    if cookie_env:
        cookie_value = os.environ.get(cookie_env)
        if cookie_value:
            merged["Cookie"] = cookie_value.replace("\r", "").replace("\n", "")
    if authorization_env:
        authorization_value = os.environ.get(authorization_env)
        if authorization_value:
            if not authorization_value.lower().startswith("bearer "):
                authorization_value = f"Bearer {authorization_value}"
            merged["Authorization"] = authorization_value.replace("\r", "").replace("\n", "")
    return merged


 def normalize_authorization_header(value: str) -> str:
    value = value.strip()
    if not value:
        return value
    if value.lower().startswith("bearer "):
        return value
    return f"Bearer {value}"


 def parse_header_line(value: str) -> tuple[str, str] | None:
    if ":" not in value:
        return None
    name, header_value = value.split(":", 1)
    name = name.strip()
    header_value = header_value.strip()
    if not name:
        return None
    return name, header_value


 def auth_headers_from_curl_text(text: str) -> dict[str, str]:
    try:
        parts = shlex.split(text)
    except ValueError as error:
        raise ValueError(f"could not parse cURL file: {error}") from error

    headers: dict[str, str] = {}
    index = 0
    while index < len(parts):
        part = parts[index]
        next_part = parts[index + 1] if index + 1 < len(parts) else None
        if part in {"-H", "--header"} and next_part is not None:
            parsed = parse_header_line(next_part)
            if parsed:
                name, value = parsed
                lower = name.lower()
                if lower == "authorization":
                    headers["Authorization"] = normalize_authorization_header(value)
                elif lower == "cookie":
                    headers["Cookie"] = value
            index += 2
            continue
        if part.startswith("-H") and part != "-H":
            parsed = parse_header_line(part[2:])
            if parsed:
                name, value = parsed
                lower = name.lower()
                if lower == "authorization":
                    headers["Authorization"] = normalize_authorization_header(value)
                elif lower == "cookie":
                    headers["Cookie"] = value
        elif part in {"-b", "--cookie", "--cookie-jar"} and next_part is not None:
            if part != "--cookie-jar":
                headers["Cookie"] = next_part.strip()
            index += 2
            continue
        index += 1
    return headers


 def auth_headers_from_curl_file(path: Path) -> dict[str, str]:
    text = path.read_text(encoding="utf-8")
    return auth_headers_from_curl_text(text)


 def score_auth_entry(entry: dict[str, Any]) -> int:
    request = entry.get("request", {})
    if not isinstance(request, dict):
        return -1
    url = str(request.get("url") or "")
    method = str(request.get("method") or "")
    parsed = urlparse(url)
    score = 0
    if "backend-api" in parsed.path and "chatgpt.com" in parsed.netloc:
        score += 20
    elif "backend-api" in parsed.path:
        score += 10
    else:
        return -1
    if method == "GET":
        score += 3
    if "/backend-api/gizmos/" in parsed.path and "/snorlax/" not in parsed.path:
        score += 100
    return score


 def best_auth_entry(har: dict[str, Any]) -> dict[str, Any] | None:
    scored = [(score_auth_entry(entry), index, entry) for index, entry in enumerate(har_entries(har))]
    scored = [item for item in scored if item[0] >= 0]
    if not scored:
        return None
    scored.sort(key=lambda item: (item[0], item[1]), reverse=True)
    return scored[0][2]


 def auth_origin(entry: dict[str, Any] | None) -> tuple[str, str]:
    if not entry:
        return "https", "chatgpt.com"
    request = entry.get("request", {})
    url = str(request.get("url") or "https://chatgpt.com") if isinstance(request, dict) else "https://chatgpt.com"
    parsed = urlparse(url)
    return parsed.scheme or "https", parsed.netloc or "chatgpt.com"


 def is_same_or_subdomain(host: str, allowed_host: str) -> bool:
    normalized_host = host.lower().split(":", 1)[0]
    normalized_allowed = allowed_host.lower().split(":", 1)[0]
    return normalized_host == normalized_allowed or normalized_host.endswith("." + normalized_allowed)


 def headers_for_url(url: str, auth_headers: dict[str, str], auth_host: str) -> dict[str, str]:
    host = urlparse(url).netloc
    if is_same_or_subdomain(host, auth_host):
        return dict(auth_headers)
    return {
        name: value
        for name, value in auth_headers.items()
        if name.lower() in EXTERNAL_REDIRECT_HEADER_ALLOWLIST
    }


 def fetch_url(
    url: str,
    auth_headers: dict[str, str],
    auth_host: str,
    timeout: int,
    max_redirects: int = 5,
 ) -> FetchResult:
    opener = build_opener(NoRedirectHandler)
    current_url = url
    for _ in range(max_redirects + 1):
        headers = headers_for_url(current_url, auth_headers, auth_host)
        request = Request(current_url, headers=headers, method="GET")
        try:
            with opener.open(request, timeout=timeout) as response:
                body = response.read()
                return FetchResult(
                    status=response.status,
                    headers={key.lower(): value for key, value in response.headers.items()},
                    body=body,
                    final_url=response.geturl(),
                )
        except HTTPError as error:
            location = error.headers.get("Location")
            if error.code in {301, 302, 303, 307, 308} and location:
                current_url = urljoin(current_url, location)
                continue
            body = error.read()
            return FetchResult(
                status=error.code,
                headers={key.lower(): value for key, value in error.headers.items()},
                body=body,
                final_url=current_url,
                error=str(error),
            )
        except URLError as error:
            return FetchResult(
                status=None,
                headers={},
                body=b"",
                final_url=current_url,
                error=str(error.reason),
            )
    return FetchResult(
        status=None,
        headers={},
        body=b"",
        final_url=current_url,
        error=f"too many redirects after {max_redirects}",
    )


 def is_gizmo_payload(data: Any) -> bool:
    if not isinstance(data, dict):
        return False
    gizmo = data.get("gizmo")
    return isinstance(gizmo, dict) and (
        bool(gizmo.get("id"))
        or "instructions" in gizmo
        or "display" in gizmo
        or "files" in data
    )


 def group_gizmo_payloads(payloads: list[JsonPayload]) -> dict[str, list[JsonPayload]]:
    grouped: dict[str, list[JsonPayload]] = {}
    for payload in payloads:
        if not is_gizmo_payload(payload.data):
            continue
        gizmo = payload.data.get("gizmo") if isinstance(payload.data, dict) else {}
        gizmo_id = str(gizmo.get("id") or f"unknown-entry-{payload.index}")
        grouped.setdefault(gizmo_id, []).append(payload)
    return grouped


 def parse_datetime(value: Any) -> float:
    if not isinstance(value, str):
        return 0.0
    try:
        return datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp()
    except ValueError:
        return 0.0


 def version_value(value: Any) -> int:
    try:
        return int(value)
    except (TypeError, ValueError):
        return -1


 def latest_payload(payloads: list[JsonPayload]) -> JsonPayload:
    def key(payload: JsonPayload) -> tuple[int, float, int]:
        gizmo = payload.data.get("gizmo", {}) if isinstance(payload.data, dict) else {}
        return (
            version_value(gizmo.get("version")),
            parse_datetime(gizmo.get("version_updated_at") or gizmo.get("updated_at")),
            payload.index,
        )

    return max(payloads, key=key)


 def is_sensitive_key(key: str) -> bool:
    lower = key.lower()
    if lower in SENSITIVE_KEY_EXACT:
        return True
    if lower.endswith(("_token", "-token")):
        return True
    if lower.startswith(("token_", "secret_", "credential_")):
        return True
    if "authorization" in lower or "set-cookie" in lower:
        return True
    return False


 def redact_url(value: str) -> str:
    parsed = urlparse(value)
    if not parsed.scheme or not parsed.netloc:
        return value
    return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, "", ""))


 def sanitize_for_disk(value: Any, parent_key: str = "") -> Any:
    if isinstance(value, dict):
        sanitized: dict[str, Any] = {}
        for key, item in value.items():
            key_text = str(key)
            if is_sensitive_key(key_text):
                sanitized[key_text] = "[REDACTED]"
            else:
                sanitized[key_text] = sanitize_for_disk(item, key_text)
        return sanitized
    if isinstance(value, list):
        return [sanitize_for_disk(item, parent_key) for item in value]
    if isinstance(value, str):
        parent_lower = parent_key.lower()
        if parent_lower in SIGNED_URL_KEYS or parent_lower.endswith("_url"):
            return redact_url(value)
        return value
    return value


 def write_json(path: Path, value: Any) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(value, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")


 def write_text(path: Path, value: str) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(value, encoding="utf-8")


 def slugify(value: str, fallback: str) -> str:
    cleaned = re.sub(r"[^A-Za-z0-9._-]+", "-", value.strip())
    cleaned = re.sub(r"-{2,}", "-", cleaned).strip("-._")
    return cleaned[:80] or fallback


 def safe_filename(name: str | None, fallback: str, content_type: str | None = None) -> str:
    candidate = unquote(name or "").replace("\\", "/").rsplit("/", 1)[-1].strip()
    candidate = re.sub(r"[\x00-\x1f\x7f]+", "", candidate)
    candidate = re.sub(r"[^A-Za-z0-9._ -]+", "_", candidate)
    candidate = candidate.strip(" .")
    if not candidate:
        candidate = fallback
    if "." not in candidate and content_type:
        extension = mimetypes.guess_extension(content_type.split(";", 1)[0].strip())
        if extension:
            candidate += extension
    return candidate[:180]


 def unique_path(path: Path) -> Path:
    if not path.exists():
        return path
    stem = path.stem
    suffix = path.suffix
    for counter in range(2, 10_000):
        candidate = path.with_name(f"{stem}-{counter}{suffix}")
        if not candidate.exists():
            return candidate
    raise RuntimeError(f"could not find unique filename for {path}")


 def walk_json(value: Any, path: str = "$"):
    yield path, value
    if isinstance(value, dict):
        for key, item in value.items():
            yield from walk_json(item, f"{path}.{key}")
    elif isinstance(value, list):
        for index, item in enumerate(value):
            yield from walk_json(item, f"{path}[{index}]")


 def file_id_from_dict(value: dict[str, Any]) -> str | None:
    file_id = value.get("file_id")
    if isinstance(file_id, str) and file_id:
        return file_id
    id_value = value.get("id")
    if isinstance(id_value, str) and id_value.startswith("file_"):
        return id_value
    return None


 def extract_files(payloads: list[JsonPayload]) -> list[dict[str, Any]]:
    files_by_id: dict[str, dict[str, Any]] = {}
    for payload in payloads:
        for source_path, value in walk_json(payload.data):
            if not isinstance(value, dict):
                continue
            file_id = file_id_from_dict(value)
            if not file_id:
                continue
            file_meta = {
                "file_id": file_id,
                "name": value.get("name") or value.get("filename") or value.get("file_name"),
                "type": value.get("type") or value.get("mime_type") or value.get("content_type"),
                "size": value.get("size") or value.get("bytes") or value.get("file_size"),
                "created_at": value.get("created_at"),
                "last_modified": value.get("last_modified"),
                "location": value.get("location"),
                "library_file_id": value.get("library_file_id"),
                "metadata": value.get("metadata"),
                "file_size_tokens": value.get("file_size_tokens"),
                "sources": [f"entry:{payload.index}:{source_path}"],
            }
            existing = files_by_id.get(file_id)
            if existing is None:
                files_by_id[file_id] = file_meta
                continue
            for key, item in file_meta.items():
                if key == "sources":
                    existing[key] = sorted(set(existing.get(key, []) + item))
                elif existing.get(key) in {None, ""} and item not in {None, ""}:
                    existing[key] = item
    return list(files_by_id.values())


 def content_type(headers: dict[str, str]) -> str:
    return headers.get("content-type", "")


 def looks_like_json_response(result: FetchResult) -> bool:
    ctype = content_type(result.headers).lower()
    return "application/json" in ctype or result.body.lstrip().startswith((b"{", b"["))


 def looks_like_html_response(result: FetchResult) -> bool:
    ctype = content_type(result.headers).lower()
    prefix = result.body[:512].lstrip().lower()
    return "text/html" in ctype or prefix.startswith((b"<!doctype html", b"<html"))


 def json_from_response(result: FetchResult) -> Any | None:
    try:
        return json.loads(result.body.decode("utf-8"))
    except Exception:
        return None


 def find_download_url(value: Any) -> str | None:
    for _, item in walk_json(value):
        if not isinstance(item, dict):
            continue
        for key, candidate in item.items():
            if str(key).lower() not in SIGNED_URL_KEYS:
                continue
            if isinstance(candidate, str) and candidate.startswith(("https://", "http://")):
                return candidate
    return None


 def url_label(url: str) -> str:
    parsed = urlparse(url)
    return urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))


 def build_candidate_urls(
    scheme: str,
    host: str,
    file_id: str,
    gizmo_id: str | None,
 ) -> list[str]:
    encoded_file_id = quote(file_id, safe="")
    encoded_gizmo_id = quote(gizmo_id or "", safe="")
    urls: list[str] = []
    seen: set[str] = set()
    for template in CANDIDATE_ENDPOINTS:
        if "{gizmo_id}" in template and not gizmo_id:
            continue
        path = template.format(file_id=encoded_file_id, gizmo_id=encoded_gizmo_id)
        url = f"{scheme}://{host}{path}"
        if url not in seen:
            seen.add(url)
            urls.append(url)
    return urls


 def save_file_response(result: FetchResult, file_meta: dict[str, Any], out_dir: Path) -> str:
    name = safe_filename(
        str(file_meta.get("name") or ""),
        fallback=f"{file_meta['file_id']}.bin",
        content_type=str(file_meta.get("type") or content_type(result.headers) or ""),
    )
    out_path = unique_path(out_dir / name)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_bytes(result.body)
    return str(out_path)


 def content_disposition_filename(value: str | None) -> str | None:
    if not value:
        return None
    match = re.search(r"filename\\*=UTF-8''([^;]+)", value, flags=re.IGNORECASE)
    if match:
        return unquote(match.group(1).strip().strip('"'))
    match = re.search(r'filename="?([^";]+)"?', value, flags=re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return None


 def save_gizmo_export_response(result: FetchResult, gizmo_id: str, out_dir: Path) -> str:
    filename = content_disposition_filename(result.headers.get("content-disposition"))
    name = safe_filename(filename, fallback=f"{gizmo_id}-export.zip", content_type=content_type(result.headers))
    if "." not in name:
        name += ".zip"
    out_path = unique_path(out_dir / name)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_bytes(result.body)
    return str(out_path)


 def build_gizmo_export_urls(scheme: str, host: str, gizmo_id: str) -> list[str]:
    encoded_gizmo_id = quote(gizmo_id, safe="")
    return [
        f"{scheme}://{host}{template.format(gizmo_id=encoded_gizmo_id)}"
        for template in GIZMO_EXPORT_ENDPOINTS
    ]


 def download_gizmo_export(
    gizmo_id: str,
    scheme: str,
    host: str,
    auth_headers: dict[str, str],
    auth_host: str,
    out_dir: Path,
    timeout: int,
 ) -> dict[str, Any]:
    report: dict[str, Any] = {
        "gizmo_id": gizmo_id,
        "saved": False,
        "saved_path": None,
        "attempts": [],
    }
    for url in build_gizmo_export_urls(scheme, host, gizmo_id):
        result = fetch_url(url, auth_headers, auth_host, timeout=timeout)
        attempt = {
            "url": url_label(url),
            "status": result.status,
            "content_type": content_type(result.headers),
            "final_url": url_label(result.final_url),
            "error": result.error,
        }
        if looks_like_json_response(result):
            attempt["json_response"] = sanitize_for_disk(json_from_response(result))
        report["attempts"].append(attempt)
        if result.status != 200 or not result.body:
            continue
        if looks_like_json_response(result) or looks_like_html_response(result):
            continue
        report["saved_path"] = save_gizmo_export_response(result, gizmo_id, out_dir)
        report["saved"] = True
        return report
    return report


 def download_one_file(
    file_meta: dict[str, Any],
    gizmo_id: str | None,
    scheme: str,
    host: str,
    auth_headers: dict[str, str],
    auth_host: str,
    out_dir: Path,
    timeout: int,
 ) -> dict[str, Any]:
    report: dict[str, Any] = {
        "file_id": file_meta.get("file_id"),
        "name": file_meta.get("name"),
        "saved": False,
        "saved_path": None,
        "attempts": [],
    }
    for url in build_candidate_urls(scheme, host, str(file_meta["file_id"]), gizmo_id):
        result = fetch_url(url, auth_headers, auth_host, timeout=timeout)
        attempt = {
            "url": url_label(url),
            "status": result.status,
            "content_type": content_type(result.headers),
            "final_url": url_label(result.final_url),
            "error": result.error,
        }
        if looks_like_json_response(result):
            attempt["json_response"] = sanitize_for_disk(json_from_response(result))
        report["attempts"].append(attempt)
        if result.status != 200 or not result.body:
            continue
        if looks_like_json_response(result):
            data = json_from_response(result)
            signed_url = find_download_url(data)
            if not signed_url:
                attempt["json_response"] = sanitize_for_disk(data)
                continue
            signed_result = fetch_url(signed_url, auth_headers, auth_host, timeout=timeout)
            signed_attempt = {
                "url": url_label(signed_url),
                "status": signed_result.status,
                "content_type": content_type(signed_result.headers),
                "final_url": url_label(signed_result.final_url),
                "error": signed_result.error,
                "via_signed_url": True,
            }
            report["attempts"].append(signed_attempt)
            if signed_result.status == 200 and signed_result.body and not looks_like_html_response(signed_result):
                report["saved_path"] = save_file_response(signed_result, file_meta, out_dir)
                report["saved"] = True
                return report
            continue
        if looks_like_html_response(result):
            attempt["skipped"] = "html response"
            continue
        report["saved_path"] = save_file_response(result, file_meta, out_dir)
        report["saved"] = True
        return report
    return report


 def compact_gizmo_context(payload: JsonPayload, files: list[dict[str, Any]]) -> dict[str, Any]:
    data = payload.data if isinstance(payload.data, dict) else {}
    gizmo = data.get("gizmo", {}) if isinstance(data, dict) else {}
    display = gizmo.get("display", {}) if isinstance(gizmo, dict) else {}
    return {
        "id": gizmo.get("id"),
        "name": display.get("name"),
        "description": display.get("description"),
        "instructions": gizmo.get("instructions"),
        "prompt_starters": display.get("prompt_starters"),
        "model": gizmo.get("model") or gizmo.get("default_model"),
        "voice": gizmo.get("voice"),
        "tools": data.get("tools"),
        "files": files,
        "product_features": data.get("product_features"),
        "permissions": gizmo.get("current_user_permission"),
        "sharing": gizmo.get("sharing"),
        "sharing_targets": gizmo.get("sharing_targets"),
        "created_at": gizmo.get("created_at"),
        "updated_at": gizmo.get("updated_at"),
        "version": gizmo.get("version"),
        "version_created_at": gizmo.get("version_created_at"),
        "version_updated_at": gizmo.get("version_updated_at"),
    }


 def render_summary_markdown(payload: JsonPayload, files: list[dict[str, Any]]) -> str:
    data = payload.data if isinstance(payload.data, dict) else {}
    gizmo = data.get("gizmo", {}) if isinstance(data, dict) else {}
    display = gizmo.get("display", {}) if isinstance(gizmo, dict) else {}
    name = display.get("name") or gizmo.get("id") or "GPT"
    lines = [
        f"# {name}",
        "",
        f"- ID: `{gizmo.get('id', '')}`",
        f"- Version: `{gizmo.get('version', '')}`",
        f"- Created: `{gizmo.get('created_at', '')}`",
        f"- Updated: `{gizmo.get('updated_at', '')}`",
        f"- Can export: `{(gizmo.get('current_user_permission') or {}).get('can_export', '')}`",
        "",
    ]
    description = display.get("description")
    if description:
        lines.extend(["## Description", "", str(description), ""])
    instructions = gizmo.get("instructions")
    if instructions:
        lines.extend(["## Instructions", "", str(instructions).rstrip(), ""])
    starters = display.get("prompt_starters")
    if starters:
        lines.extend(["## Prompt Starters", ""])
        for starter in starters:
            lines.append(f"- {starter}")
        lines.append("")
    lines.extend(["## Knowledge Files", ""])
    if files:
        lines.append("| Name | File ID | Type | Size |")
        lines.append("| --- | --- | --- | ---: |")
        for file_meta in files:
            lines.append(
                "| "
                + " | ".join(
                    [
                        str(file_meta.get("name") or ""),
                        f"`{file_meta.get('file_id') or ''}`",
                        str(file_meta.get("type") or ""),
                        str(file_meta.get("size") or ""),
                    ]
                )
                + " |"
            )
    else:
        lines.append("No file references were found in the HAR payloads.")
    lines.append("")
    return "\n".join(lines)


 def version_dir_name(payload: JsonPayload) -> str:
    gizmo = payload.data.get("gizmo", {}) if isinstance(payload.data, dict) else {}
    version = gizmo.get("version")
    version_label = f"version-{version}" if version not in {None, ""} else "version-unknown"
    return f"{version_label}-entry-{payload.index}"


 def write_har_index(payloads: list[JsonPayload], out_dir: Path) -> None:
    index = [
        {
            "entry": payload.index,
            "method": payload.method,
            "host": payload.host,
            "path": payload.path,
            "status": payload.status,
            "content_type": payload.content_type,
            "is_gizmo_payload": is_gizmo_payload(payload.data),
        }
        for payload in payloads
    ]
    write_json(out_dir / "har-index.json", index)


 def dump_all_json(payloads: list[JsonPayload], out_dir: Path) -> None:
    dump_dir = out_dir / "har-json-responses"
    for payload in payloads:
        name = slugify(f"{payload.index}-{payload.method}-{payload.path}", f"entry-{payload.index}")
        write_json(
            dump_dir / f"{name}.json",
            {
                "source": {
                    "entry": payload.index,
                    "method": payload.method,
                    "host": payload.host,
                    "path": payload.path,
                    "status": payload.status,
                    "content_type": payload.content_type,
                },
                "payload": sanitize_for_disk(payload.data),
            },
        )


 def export_gpts(
    har: dict[str, Any],
    har_path: Path,
    out_dir: Path,
    download: bool,
    dump_json: bool,
    timeout: int,
    cookie_env: str | None = None,
    authorization_env: str | None = None,
    curl_file: Path | None = None,
 ) -> dict[str, Any]:
    payloads = extract_json_payloads(har)
    grouped = group_gizmo_payloads(payloads)
    auth_entry = best_auth_entry(har)
    scheme, host = auth_origin(auth_entry)
    auth_headers = request_headers_from_entry(auth_entry) if auth_entry else {}
    auth_headers = merge_runtime_auth_headers(
        auth_headers,
        cookie_env=cookie_env,
        authorization_env=authorization_env,
        curl_file=curl_file,
    )
    _, auth_host = auth_origin(auth_entry)
    auth_ready = has_auth_material(auth_headers)

    out_dir.mkdir(parents=True, exist_ok=True)
    write_har_index(payloads, out_dir)
    if dump_json:
        dump_all_json(payloads, out_dir)

    manifest: dict[str, Any] = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "source_har": str(har_path),
        "json_payload_count": len(payloads),
        "gpt_count": len(grouped),
        "download_attempted": download,
        "auth_material_found": auth_ready,
        "runtime_cookie_env_used": bool(cookie_env and os.environ.get(cookie_env)),
        "runtime_authorization_env_used": bool(authorization_env and os.environ.get(authorization_env)),
        "runtime_curl_file_used": bool(curl_file),
        "gpts": [],
    }

    for gizmo_id, snapshots in sorted(grouped.items()):
        latest = latest_payload(snapshots)
        files = extract_files(snapshots)
        data = latest.data if isinstance(latest.data, dict) else {}
        gizmo = data.get("gizmo", {}) if isinstance(data, dict) else {}
        display = gizmo.get("display", {}) if isinstance(gizmo, dict) else {}
        name = str(display.get("name") or gizmo_id)
        gpt_dir = out_dir / "gpts" / slugify(f"{name}-{gizmo_id}", gizmo_id)
        gpt_dir.mkdir(parents=True, exist_ok=True)

        write_text(gpt_dir / "summary.md", render_summary_markdown(latest, files))
        write_text(gpt_dir / "instructions.md", str(gizmo.get("instructions") or ""))
        write_json(
            gpt_dir / "metadata.json",
            {
                "source": {
                    "entry": latest.index,
                    "method": latest.method,
                    "host": latest.host,
                    "path": latest.path,
                    "status": latest.status,
                },
                "payload": sanitize_for_disk(latest.data),
            },
        )
        write_json(gpt_dir / "context.json", sanitize_for_disk(compact_gizmo_context(latest, files)))
        write_json(gpt_dir / "files.json", sanitize_for_disk(files))
        write_json(gpt_dir / "tools.json", sanitize_for_disk(data.get("tools")))
        write_json(gpt_dir / "product-features.json", sanitize_for_disk(data.get("product_features")))

        versions_dir = gpt_dir / "versions"
        for snapshot in snapshots:
            write_json(
                versions_dir / version_dir_name(snapshot) / "payload.json",
                {
                    "source": {
                        "entry": snapshot.index,
                        "method": snapshot.method,
                        "host": snapshot.host,
                        "path": snapshot.path,
                        "status": snapshot.status,
                    },
                    "payload": sanitize_for_disk(snapshot.data),
                },
            )

        gizmo_export_report: dict[str, Any] | None = None
        if download:
            if auth_headers and auth_ready:
                gizmo_export_report = download_gizmo_export(
                    gizmo_id=gizmo_id,
                    scheme=scheme,
                    host=host,
                    auth_headers=auth_headers,
                    auth_host=auth_host,
                    out_dir=gpt_dir / "source-export",
                    timeout=timeout,
                )
            elif auth_headers:
                gizmo_export_report = {
                    "error": "ChatGPT backend request found, but no Cookie or Authorization material was captured in the HAR"
                }
            else:
                gizmo_export_report = {"error": "no ChatGPT backend auth request found in HAR"}
        write_json(gpt_dir / "gizmo-export-report.json", sanitize_for_disk(gizmo_export_report or []))

        download_reports: list[dict[str, Any]] = []
        if download and files:
            if not auth_headers:
                download_reports.append({"error": "no ChatGPT backend auth request found in HAR"})
            elif not auth_ready:
                download_reports.append(
                    {
                        "error": "ChatGPT backend request found, but no Cookie or Authorization material was captured in the HAR"
                    }
                )
            else:
                for file_meta in files:
                    download_reports.append(
                        download_one_file(
                            file_meta=file_meta,
                            gizmo_id=gizmo_id,
                            scheme=scheme,
                            host=host,
                            auth_headers=auth_headers,
                            auth_host=auth_host,
                            out_dir=gpt_dir / "knowledge",
                            timeout=timeout,
                        )
                    )
        write_json(gpt_dir / "download-report.json", sanitize_for_disk(download_reports))

        manifest["gpts"].append(
            {
                "id": gizmo_id,
                "name": name,
                "directory": str(gpt_dir),
                "snapshot_count": len(snapshots),
                "latest_version": gizmo.get("version"),
                "file_count": len(files),
                "downloaded_file_count": sum(1 for item in download_reports if item.get("saved")),
                "gizmo_export_saved": bool(gizmo_export_report and gizmo_export_report.get("saved")),
            }
        )

    write_json(out_dir / "manifest.json", sanitize_for_disk(manifest))
    return manifest


 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Export Custom GPT metadata and knowledge files from a ChatGPT HAR file."
    )
    parser.add_argument("har_path", type=Path, help="Path to chatgpt.com.har")
    parser.add_argument(
        "--out-dir",
        type=Path,
        default=Path(DEFAULT_OUT_DIR),
        help=f"Output directory. Default: {DEFAULT_OUT_DIR}",
    )
    parser.add_argument(
        "--no-download",
        action="store_true",
        help="Only export metadata from the HAR. Do not make network calls.",
    )
    parser.add_argument(
        "--dump-all-json",
        action="store_true",
        help="Write sanitized JSON responses for every JSON payload in the HAR.",
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=60,
        help="Download timeout in seconds. Default: 60.",
    )
    parser.add_argument(
        "--cookie-env",
        help="Name of an environment variable containing a Cookie header value for ChatGPT requests.",
    )
    parser.add_argument(
        "--authorization-env",
        help="Name of an environment variable containing a Bearer token or full Authorization header value.",
    )
    parser.add_argument(
        "--curl-file",
        type=Path,
        help="Text file containing a copied browser cURL command. Only auth headers are read.",
    )
    return parser


 def main(argv: list[str] | None = None) -> int:
    args = build_parser().parse_args(argv)
    try:
        har = load_har(args.har_path)
        manifest = export_gpts(
            har=har,
            har_path=args.har_path,
            out_dir=args.out_dir,
            download=not args.no_download,
            dump_json=args.dump_all_json,
            timeout=args.timeout,
            cookie_env=args.cookie_env,
            authorization_env=args.authorization_env,
            curl_file=args.curl_file,
        )
    except Exception as error:
        print(f"error: {error}", file=sys.stderr)
        return 1

    print(f"Exported {manifest['gpt_count']} GPT(s) into {args.out_dir}")
    if not args.no_download and not manifest.get("auth_material_found"):
        print("Download skipped: the HAR did not capture Cookie or Authorization material.")
    for gpt in manifest["gpts"]:
        print(
            f"- {gpt['name']} | {gpt['id']} | "
            f"{gpt['file_count']} file ref(s), {gpt['downloaded_file_count']} downloaded"
        )
    if not manifest["gpts"]:
        print("No GPT gizmo payloads were found. Export a HAR while viewing the GPT Configure page.")
    return 0


 if __name__ == "__main__":
    raise SystemExit(main())
No results found