JeremyMcCormick · September 2, 2025 23:30 · JeremyMcCormick · Sep 2, 2025
diff --git a/filter_chatgpt_conversation.py b/filter_chatgpt_conversation.py
 #!/usr/bin/env python3
 """
 Filter ChatGPT export conversations by keyword and optionally write results to
 JSON, a single HTML file, or a directory with one HTML file per conversation.

 Usage:
  python filter_conv.py --json /path/to/conversations.json --keyword "ppdb" \
    --out-json filtered_conversations.json \
    --out-html filtered_conversations.html \
    --out-dir filtered_conversations_dir
 """

 from __future__ import annotations

 import argparse
 import html
 import json
 import re
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Iterable


 def _flatten_parts(parts: Iterable[Any]) -> list[str]:
    """Extract text fragments from a heterogeneous `parts` structure.

    Parameters
    ----------
    parts
        Iterable of items that may be strings or dicts (possibly nested).

    Returns
    -------
    list[str]
        Extracted plain-text fragments.
    """
    out: list[str] = []
    for p in parts or []:
        if isinstance(p, str):
            out.append(p)
        elif isinstance(p, dict):
            if "text" in p and isinstance(p["text"], str):
                out.append(p["text"])
            elif "content" in p and isinstance(p["content"], str):
                out.append(p["content"])
            elif "parts" in p and isinstance(p["parts"], list):
                out.extend(_flatten_parts(p["parts"]))
            elif "content" in p and isinstance(p["content"], list):
                out.extend(_flatten_parts(p["content"]))
    return out


 def _message_to_text(message: dict[str, Any]) -> str:
    """Extract best-effort plain text from a ChatGPT export `message` object.

    Parameters
    ----------
    message
        The message dict under `node["message"]`.

    Returns
    -------
    str
        Concatenated plain text for this message (may be empty).
    """
    if not message:
        return ""

    content = message.get("content")
    if isinstance(content, dict):
        if "parts" in content and isinstance(content["parts"], list):
            return " ".join(_flatten_parts(content["parts"]))
        if "text" in content and isinstance(content["text"], str):
            return content["text"]
        if "content" in content and isinstance(content["content"], list):
            return " ".join(_flatten_parts(content["content"]))
        if "content" in content and isinstance(content["content"], str):
            return content["content"]
    elif isinstance(content, list):
        return " ".join(_flatten_parts(content))

    parts = message.get("parts")
    if isinstance(parts, list):
        return " ".join(_flatten_parts(parts))

    return ""


 def _collect_messages(convo: dict[str, Any]) -> list[dict[str, Any]]:
    """Collect messages from a conversation into a chronological list.

    Parameters
    ----------
    convo
        A single conversation object from `conversations.json`.

    Returns
    -------
    list[dict[str, Any]]
        Each item has keys: `role` (str), `text` (str), `create_time` (float|None).
    """
    messages: list[dict[str, Any]] = []
    mapping = convo.get("mapping", {})
    if not isinstance(mapping, dict):
        return messages

    for node in mapping.values():
        if not isinstance(node, dict):
            continue
        message = node.get("message")
        if not isinstance(message, dict):
            continue
        role = (message.get("author") or {}).get("role") or ""
        text = _message_to_text(message)
        ts = message.get("create_time")
        if ts is None:
            ts = convo.get("create_time")
        messages.append({"role": str(role), "text": text, "create_time": ts})

    messages.sort(key=lambda m: (m["create_time"] is None, m["create_time"]))
    return messages


 def filter_conversations_by_project(json_path: str | Path, project_keyword: str) -> list[dict[str, Any]]:
    """Filter conversations whose text contains the given keyword (case-insensitive).

    Parameters
    ----------
    json_path
        Path to the `conversations.json` file from ChatGPT export.
    project_keyword
        Keyword or phrase to match.

    Returns
    -------
    list[dict[str, Any]]
        Conversation dicts containing the keyword anywhere in their messages.
    """
    path = Path(json_path)
    with path.open(encoding="utf-8") as f:
        data: list[dict[str, Any]] = json.load(f)

    needle = project_keyword.lower()
    results: list[dict[str, Any]] = []

    for convo in data:
        texts: list[str] = []
        for m in _collect_messages(convo):
            if m["text"]:
                texts.append(m["text"])
        full_text = " ".join(texts).lower()
        if needle in full_text:
            results.append(convo)

    return results


 def _fmt_dt(ts: float | int | None) -> str:
    """Format a UNIX timestamp as ISO 8601 UTC string.

    Parameters
    ----------
    ts
        UNIX epoch seconds, or None.

    Returns
    -------
    str
        ISO 8601 string in UTC, or empty string if None/invalid.
    """
    if ts is None:
        return ""
    try:
        return datetime.fromtimestamp(float(ts), tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")
    except Exception:
        return ""


 def _sanitize_filename(name: str, max_len: int = 80) -> str:
    """Sanitize a string for safe filesystem usage.

    Parameters
    ----------
    name
        Candidate filename (e.g., conversation title).
    max_len
        Maximum length of the resulting filename stem (before extension).

    Returns
    -------
    str
        Safe filename stem without extension.
    """
    # Collapse whitespace and strip
    s = " ".join(name.split()).strip()
    # Replace unsafe characters with underscores
    s = re.sub(r'[^\w\-\. ]+', "_", s, flags=re.UNICODE)
    # Replace spaces with underscores
    s = s.replace(" ", "_")
    # Trim length
    if len(s) > max_len:
        s = s[:max_len].rstrip("_-.")
    # Ensure non-empty
    return s or "conversation"


 def render_html(conversations: list[dict[str, Any]], keyword: str) -> str:
    """Render filtered conversations as a standalone HTML page.

    Parameters
    ----------
    conversations
        List of conversation dicts (from export) already filtered.
    keyword
        The keyword used for filtering; shown in the header.

    Returns
    -------
    str
        Complete HTML document as a string.
    """
    page_title = f"ChatGPT Conversations matching: {keyword}"
    generated = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")

    parts: list[str] = [
        "<!DOCTYPE html>",
        "<html lang='en'>",
        "<head>",
        f"<meta charset='utf-8'><title>{html.escape(page_title)}</title>",
        "<meta name='viewport' content='width=device-width,initial-scale=1'>",
        "<style>",
        "body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif;line-height:1.5;margin:2rem;}",
        "h1{font-size:1.6rem;margin-bottom:0.25rem}",
        "h2{font-size:1.2rem;margin:1.5rem 0 0.5rem}",
        ".meta{color:#555;font-size:0.9rem;margin-bottom:1rem}",
        ".convo{border:1px solid #ddd;border-radius:8px;padding:1rem;margin-bottom:1.5rem;background:#fafafa}",
        ".msg{padding:0.5rem 0;border-top:1px solid #eee}",
        ".msg:first-child{border-top:none}",
        ".role{font-weight:600}",
        ".time{color:#555;font-size:0.85rem;margin-left:0.5rem}",
        "pre{white-space:pre-wrap;word-wrap:break-word;background:#fff;border:1px solid #eee;border-radius:6px;padding:0.5rem;margin:0.25rem 0}",
        "a{color:inherit}",
        "</style>",
        "</head>",
        "<body>",
        f"<h1>{html.escape(page_title)}</h1>",
        f"<div class='meta'>Generated: {html.escape(generated)} • Total conversations: {len(conversations)}</div>",
    ]

    for idx, convo in enumerate(conversations, start=1):
        title = convo.get("title") or "(untitled)"
        ctime = _fmt_dt(convo.get("create_time"))
        parts.append("<div class='convo'>")
        parts.append(f"<h2>{idx}. {html.escape(title)}</h2>")
        if ctime:
            parts.append(f"<div class='meta'>Created: {html.escape(ctime)}</div>")

        messages = _collect_messages(convo)
        for m in messages:
            role = m.get("role") or ""
            text = m.get("text") or ""
            ts = _fmt_dt(m.get("create_time"))
            safe_text = html.escape(text)
            parts.append("<div class='msg'>")
            head = f"<span class='role'>{html.escape(role)}</span>"
            if ts:
                head += f"<span class='time'>{html.escape(ts)}</span>"
            parts.append(head)
            parts.append(f"<pre>{safe_text}</pre>")
            parts.append("</div>")

        parts.append("</div>")  # .convo

    parts.append("</body></html>")
    return "\n".join(parts)


 def render_single_conversation_html(convo: dict[str, Any], idx: int) -> str:
    """Render a single conversation as a standalone HTML page.

    Parameters
    ----------
    convo
        Conversation dict (from export).
    idx
        1-based index used for labeling.

    Returns
    -------
    str
        Complete HTML document for the conversation.
    """
    title = convo.get("title") or "(untitled)"
    ctime = _fmt_dt(convo.get("create_time"))
    page_title = f"{idx}. {title}"
    generated = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")

    parts: list[str] = [
        "<!DOCTYPE html>",
        "<html lang='en'>",
        "<head>",
        f"<meta charset='utf-8'><title>{html.escape(page_title)}</title>",
        "<meta name='viewport' content='width=device-width,initial-scale=1'>",
        "<style>",
        "body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif;line-height:1.5;margin:2rem;}",
        "h1{font-size:1.6rem;margin-bottom:0.25rem}",
        ".meta{color:#555;font-size:0.9rem;margin-bottom:1rem}",
        ".msg{padding:0.75rem 0;border-top:1px solid #eee}",
        ".msg:first-child{border-top:none}",
        ".role{font-weight:600}",
        ".time{color:#555;font-size:0.85rem;margin-left:0.5rem}",
        "pre{white-space:pre-wrap;word-wrap:break-word;background:#fff;border:1px solid #eee;border-radius:6px;padding:0.5rem;margin:0.25rem 0}",
        "</style>",
        "</head>",
        "<body>",
        f"<h1>{html.escape(page_title)}</h1>",
        f"<div class='meta'>Created: {html.escape(ctime) if ctime else ''} • Exported: {html.escape(generated)}</div>",
    ]

    messages = _collect_messages(convo)
    for m in messages:
        role = m.get("role") or ""
        text = m.get("text") or ""
        ts = _fmt_dt(m.get("create_time"))
        safe_text = html.escape(text)
        parts.append("<div class='msg'>")
        head = f"<span class='role'>{html.escape(role)}</span>"
        if ts:
            head += f"<span class='time'>{html.escape(ts)}</span>"
        parts.append(head)
        parts.append(f"<pre>{safe_text}</pre>")
        parts.append("</div>")

    parts.append("</body></html>")
    return "\n".join(parts)


 def write_per_conversation_html(conversations: list[dict[str, Any]], out_dir: Path, make_index: bool = True) -> None:
    """Write one HTML file per conversation to a directory, with an optional index.

    Parameters
    ----------
    conversations
        Filtered list of conversations to write.
    out_dir
        Target directory to create/write files into.
    make_index
        Whether to create an `index.html` linking to all conversation files.

    Returns
    -------
    None
    """
    out_dir.mkdir(parents=True, exist_ok=True)

    index_rows: list[tuple[str, str, str]] = []  # (filename, title, created)
    for idx, convo in enumerate(conversations, start=1):
        title = convo.get("title") or "(untitled)"
        created = _fmt_dt(convo.get("create_time"))
        stem = f"{idx:04d}_{_sanitize_filename(title)}"
        filename = f"{stem}.html"
        file_path = out_dir / filename

        html_doc = render_single_conversation_html(convo, idx)
        file_path.write_text(html_doc, encoding="utf-8")
        index_rows.append((filename, title, created))

    if make_index:
        page_title = f"{len(conversations)} conversations"
        generated = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")
        parts: list[str] = [
            "<!DOCTYPE html>",
            "<html lang='en'>",
            "<head>",
            f"<meta charset='utf-8'><title>{html.escape(page_title)}</title>",
            "<meta name='viewport' content='width=device-width,initial-scale=1'>",
            "<style>",
            "body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif;line-height:1.5;margin:2rem;}",
            "h1{font-size:1.6rem;margin-bottom:0.25rem}",
            ".meta{color:#555;font-size:0.9rem;margin-bottom:1rem}",
            "table{border-collapse:collapse;width:100%}",
            "th,td{border:1px solid #ddd;padding:0.5rem;text-align:left}",
            "tr:nth-child(even){background:#fafafa}",
            "a{color:inherit}",
            "</style>",
            "</head>",
            "<body>",
            f"<h1>{html.escape(page_title)}</h1>",
            f"<div class='meta'>Generated: {html.escape(generated)}</div>",
            "<table>",
            "<thead><tr><th>#</th><th>Title</th><th>Created (UTC)</th></tr></thead>",
            "<tbody>",
        ]
        for i, (fname, title, created) in enumerate(index_rows, start=1):
            parts.append(
                "<tr>"
                f"<td>{i}</td>"
                f"<td><a href='{html.escape(fname)}'>{html.escape(title)}</a></td>"
                f"<td>{html.escape(created)}</td>"
                "</tr>"
            )
        parts.extend(["</tbody></table>", "</body></html>"])
        (out_dir / "index.html").write_text("\n".join(parts), encoding="utf-8")


 def main() -> None:
    """CLI entrypoint. Filters conversations by keyword and writes outputs as requested."""
    parser = argparse.ArgumentParser(description="Filter ChatGPT conversations by keyword.")
    parser.add_argument("--json", required=True, help="Path to conversations.json")
    parser.add_argument("--keyword", required=True, help="Keyword or phrase to match (case-insensitive)")
    parser.add_argument("--out-json", help="Optional path to write filtered JSON")
    parser.add_argument("--out-html", help="Optional path to write a single combined HTML")
    parser.add_argument("--out-dir", help="Optional directory to write one HTML per conversation (plus index.html)")
    args = parser.parse_args()

    matches = filter_conversations_by_project(args.json, args.keyword)

    print(f"Found {len(matches)} conversations mentioning '{args.keyword}'")
    for convo in matches:
        title = convo.get("title") or "(untitled)"
        created = _fmt_dt(convo.get("create_time"))
        print(f"- {title}  [{created}]")

    if args.out_json:
        Path(args.out_json).write_text(json.dumps(matches, indent=2, ensure_ascii=False), encoding="utf-8")
        print(f"Wrote JSON: {args.out_json}")

    if args.out_html:
        html_doc = render_html(matches, args.keyword)
        Path(args.out_html).write_text(html_doc, encoding="utf-8")
        print(f"Wrote HTML: {args.out_html}")

    if args.out_dir:
        out_dir = Path(args.out_dir)
        write_per_conversation_html(matches, out_dir, make_index=True)
        print(f"Wrote {len(matches)} conversation files to: {out_dir.resolve()}")


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Filter ChatGPT export conversations by keyword and optionally write results to
	JSON, a single HTML file, or a directory with one HTML file per conversation.

	Usage:
	python filter_conv.py --json /path/to/conversations.json --keyword "ppdb" \
	--out-json filtered_conversations.json \
	--out-html filtered_conversations.html \
	--out-dir filtered_conversations_dir
	"""

	from __future__ import annotations

	import argparse
	import html
	import json
	import re
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import Any, Iterable


	def _flatten_parts(parts: Iterable[Any]) -> list[str]:
	"""Extract text fragments from a heterogeneous `parts` structure.

	Parameters
	----------
	parts
	Iterable of items that may be strings or dicts (possibly nested).

	Returns
	-------
	list[str]
	Extracted plain-text fragments.
	"""
	out: list[str] = []
	for p in parts or []:
	if isinstance(p, str):
	out.append(p)
	elif isinstance(p, dict):
	if "text" in p and isinstance(p["text"], str):
	out.append(p["text"])
	elif "content" in p and isinstance(p["content"], str):
	out.append(p["content"])
	elif "parts" in p and isinstance(p["parts"], list):
	out.extend(_flatten_parts(p["parts"]))
	elif "content" in p and isinstance(p["content"], list):
	out.extend(_flatten_parts(p["content"]))
	return out


	def _message_to_text(message: dict[str, Any]) -> str:
	"""Extract best-effort plain text from a ChatGPT export `message` object.

	Parameters
	----------
	message
	The message dict under `node["message"]`.

	Returns
	-------
	str
	Concatenated plain text for this message (may be empty).
	"""
	if not message:
	return ""

	content = message.get("content")
	if isinstance(content, dict):
	if "parts" in content and isinstance(content["parts"], list):
	return " ".join(_flatten_parts(content["parts"]))
	if "text" in content and isinstance(content["text"], str):
	return content["text"]
	if "content" in content and isinstance(content["content"], list):
	return " ".join(_flatten_parts(content["content"]))
	if "content" in content and isinstance(content["content"], str):
	return content["content"]
	elif isinstance(content, list):
	return " ".join(_flatten_parts(content))

	parts = message.get("parts")
	if isinstance(parts, list):
	return " ".join(_flatten_parts(parts))

	return ""


	def _collect_messages(convo: dict[str, Any]) -> list[dict[str, Any]]:
	"""Collect messages from a conversation into a chronological list.

	Parameters
	----------
	convo
	A single conversation object from `conversations.json`.

	Returns
	-------
	list[dict[str, Any]]
	Each item has keys: `role` (str), `text` (str), `create_time` (float\|None).
	"""
	messages: list[dict[str, Any]] = []
	mapping = convo.get("mapping", {})
	if not isinstance(mapping, dict):
	return messages

	for node in mapping.values():
	if not isinstance(node, dict):
	continue
	message = node.get("message")
	if not isinstance(message, dict):
	continue
	role = (message.get("author") or {}).get("role") or ""
	text = _message_to_text(message)
	ts = message.get("create_time")
	if ts is None:
	ts = convo.get("create_time")
	messages.append({"role": str(role), "text": text, "create_time": ts})

	messages.sort(key=lambda m: (m["create_time"] is None, m["create_time"]))
	return messages


	def filter_conversations_by_project(json_path: str \| Path, project_keyword: str) -> list[dict[str, Any]]:
	"""Filter conversations whose text contains the given keyword (case-insensitive).

	Parameters
	----------
	json_path
	Path to the `conversations.json` file from ChatGPT export.
	project_keyword
	Keyword or phrase to match.

	Returns
	-------
	list[dict[str, Any]]
	Conversation dicts containing the keyword anywhere in their messages.
	"""
	path = Path(json_path)
	with path.open(encoding="utf-8") as f:
	data: list[dict[str, Any]] = json.load(f)

	needle = project_keyword.lower()
	results: list[dict[str, Any]] = []

	for convo in data:
	texts: list[str] = []
	for m in _collect_messages(convo):
	if m["text"]:
	texts.append(m["text"])
	full_text = " ".join(texts).lower()
	if needle in full_text:
	results.append(convo)

	return results


	def _fmt_dt(ts: float \| int \| None) -> str:
	"""Format a UNIX timestamp as ISO 8601 UTC string.

	Parameters
	----------
	ts
	UNIX epoch seconds, or None.

	Returns
	-------
	str
	ISO 8601 string in UTC, or empty string if None/invalid.
	"""
	if ts is None:
	return ""
	try:
	return datetime.fromtimestamp(float(ts), tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")
	except Exception:
	return ""


	def _sanitize_filename(name: str, max_len: int = 80) -> str:
	"""Sanitize a string for safe filesystem usage.

	Parameters
	----------
	name
	Candidate filename (e.g., conversation title).
	max_len
	Maximum length of the resulting filename stem (before extension).

	Returns
	-------
	str
	Safe filename stem without extension.
	"""
	# Collapse whitespace and strip
	s = " ".join(name.split()).strip()
	# Replace unsafe characters with underscores
	s = re.sub(r'[^\w\-\. ]+', "_", s, flags=re.UNICODE)
	# Replace spaces with underscores
	s = s.replace(" ", "_")
	# Trim length
	if len(s) > max_len:
	s = s[:max_len].rstrip("_-.")
	# Ensure non-empty
	return s or "conversation"


	def render_html(conversations: list[dict[str, Any]], keyword: str) -> str:
	"""Render filtered conversations as a standalone HTML page.

	Parameters
	----------
	conversations
	List of conversation dicts (from export) already filtered.
	keyword
	The keyword used for filtering; shown in the header.

	Returns
	-------
	str
	Complete HTML document as a string.
	"""
	page_title = f"ChatGPT Conversations matching: {keyword}"
	generated = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")

	parts: list[str] = [
	"<!DOCTYPE html>",
	"<html lang='en'>",
	"<head>",
	f"<meta charset='utf-8'><title>{html.escape(page_title)}</title>",
	"<meta name='viewport' content='width=device-width,initial-scale=1'>",
	"<style>",
	"body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif;line-height:1.5;margin:2rem;}",
	"h1{font-size:1.6rem;margin-bottom:0.25rem}",
	"h2{font-size:1.2rem;margin:1.5rem 0 0.5rem}",
	".meta{color:#555;font-size:0.9rem;margin-bottom:1rem}",
	".convo{border:1px solid #ddd;border-radius:8px;padding:1rem;margin-bottom:1.5rem;background:#fafafa}",
	".msg{padding:0.5rem 0;border-top:1px solid #eee}",
	".msg:first-child{border-top:none}",
	".role{font-weight:600}",
	".time{color:#555;font-size:0.85rem;margin-left:0.5rem}",
	"pre{white-space:pre-wrap;word-wrap:break-word;background:#fff;border:1px solid #eee;border-radius:6px;padding:0.5rem;margin:0.25rem 0}",
	"a{color:inherit}",
	"</style>",
	"</head>",
	"<body>",
	f"<h1>{html.escape(page_title)}</h1>",
	f"<div class='meta'>Generated: {html.escape(generated)} • Total conversations: {len(conversations)}</div>",
	]

	for idx, convo in enumerate(conversations, start=1):
	title = convo.get("title") or "(untitled)"
	ctime = _fmt_dt(convo.get("create_time"))
	parts.append("<div class='convo'>")
	parts.append(f"<h2>{idx}. {html.escape(title)}</h2>")
	if ctime:
	parts.append(f"<div class='meta'>Created: {html.escape(ctime)}</div>")

	messages = _collect_messages(convo)
	for m in messages:
	role = m.get("role") or ""
	text = m.get("text") or ""
	ts = _fmt_dt(m.get("create_time"))
	safe_text = html.escape(text)
	parts.append("<div class='msg'>")
	head = f"<span class='role'>{html.escape(role)}</span>"
	if ts:
	head += f"<span class='time'>{html.escape(ts)}</span>"
	parts.append(head)
	parts.append(f"<pre>{safe_text}</pre>")
	parts.append("</div>")

	parts.append("</div>") # .convo

	parts.append("</body></html>")
	return "\n".join(parts)


	def render_single_conversation_html(convo: dict[str, Any], idx: int) -> str:
	"""Render a single conversation as a standalone HTML page.

	Parameters
	----------
	convo
	Conversation dict (from export).
	idx
	1-based index used for labeling.

	Returns
	-------
	str
	Complete HTML document for the conversation.
	"""
	title = convo.get("title") or "(untitled)"
	ctime = _fmt_dt(convo.get("create_time"))
	page_title = f"{idx}. {title}"
	generated = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")

	parts: list[str] = [
	"<!DOCTYPE html>",
	"<html lang='en'>",
	"<head>",
	f"<meta charset='utf-8'><title>{html.escape(page_title)}</title>",
	"<meta name='viewport' content='width=device-width,initial-scale=1'>",
	"<style>",
	"body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif;line-height:1.5;margin:2rem;}",
	"h1{font-size:1.6rem;margin-bottom:0.25rem}",
	".meta{color:#555;font-size:0.9rem;margin-bottom:1rem}",
	".msg{padding:0.75rem 0;border-top:1px solid #eee}",
	".msg:first-child{border-top:none}",
	".role{font-weight:600}",
	".time{color:#555;font-size:0.85rem;margin-left:0.5rem}",
	"pre{white-space:pre-wrap;word-wrap:break-word;background:#fff;border:1px solid #eee;border-radius:6px;padding:0.5rem;margin:0.25rem 0}",
	"</style>",
	"</head>",
	"<body>",
	f"<h1>{html.escape(page_title)}</h1>",
	f"<div class='meta'>Created: {html.escape(ctime) if ctime else ''} • Exported: {html.escape(generated)}</div>",
	]

	messages = _collect_messages(convo)
	for m in messages:
	role = m.get("role") or ""
	text = m.get("text") or ""
	ts = _fmt_dt(m.get("create_time"))
	safe_text = html.escape(text)
	parts.append("<div class='msg'>")
	head = f"<span class='role'>{html.escape(role)}</span>"
	if ts:
	head += f"<span class='time'>{html.escape(ts)}</span>"
	parts.append(head)
	parts.append(f"<pre>{safe_text}</pre>")
	parts.append("</div>")

	parts.append("</body></html>")
	return "\n".join(parts)


	def write_per_conversation_html(conversations: list[dict[str, Any]], out_dir: Path, make_index: bool = True) -> None:
	"""Write one HTML file per conversation to a directory, with an optional index.

	Parameters
	----------
	conversations
	Filtered list of conversations to write.
	out_dir
	Target directory to create/write files into.
	make_index
	Whether to create an `index.html` linking to all conversation files.

	Returns
	-------
	None
	"""
	out_dir.mkdir(parents=True, exist_ok=True)

	index_rows: list[tuple[str, str, str]] = [] # (filename, title, created)
	for idx, convo in enumerate(conversations, start=1):
	title = convo.get("title") or "(untitled)"
	created = _fmt_dt(convo.get("create_time"))
	stem = f"{idx:04d}_{_sanitize_filename(title)}"
	filename = f"{stem}.html"
	file_path = out_dir / filename

	html_doc = render_single_conversation_html(convo, idx)
	file_path.write_text(html_doc, encoding="utf-8")
	index_rows.append((filename, title, created))

	if make_index:
	page_title = f"{len(conversations)} conversations"
	generated = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")
	parts: list[str] = [
	"<!DOCTYPE html>",
	"<html lang='en'>",
	"<head>",
	f"<meta charset='utf-8'><title>{html.escape(page_title)}</title>",
	"<meta name='viewport' content='width=device-width,initial-scale=1'>",
	"<style>",
	"body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif;line-height:1.5;margin:2rem;}",
	"h1{font-size:1.6rem;margin-bottom:0.25rem}",
	".meta{color:#555;font-size:0.9rem;margin-bottom:1rem}",
	"table{border-collapse:collapse;width:100%}",
	"th,td{border:1px solid #ddd;padding:0.5rem;text-align:left}",
	"tr:nth-child(even){background:#fafafa}",
	"a{color:inherit}",
	"</style>",
	"</head>",
	"<body>",
	f"<h1>{html.escape(page_title)}</h1>",
	f"<div class='meta'>Generated: {html.escape(generated)}</div>",
	"<table>",
	"<thead><tr><th>#</th><th>Title</th><th>Created (UTC)</th></tr></thead>",
	"<tbody>",
	]
	for i, (fname, title, created) in enumerate(index_rows, start=1):
	parts.append(
	"<tr>"
	f"<td>{i}</td>"
	f"<td><a href='{html.escape(fname)}'>{html.escape(title)}</a></td>"
	f"<td>{html.escape(created)}</td>"
	"</tr>"
	)
	parts.extend(["</tbody></table>", "</body></html>"])
	(out_dir / "index.html").write_text("\n".join(parts), encoding="utf-8")


	def main() -> None:
	"""CLI entrypoint. Filters conversations by keyword and writes outputs as requested."""
	parser = argparse.ArgumentParser(description="Filter ChatGPT conversations by keyword.")
	parser.add_argument("--json", required=True, help="Path to conversations.json")
	parser.add_argument("--keyword", required=True, help="Keyword or phrase to match (case-insensitive)")
	parser.add_argument("--out-json", help="Optional path to write filtered JSON")
	parser.add_argument("--out-html", help="Optional path to write a single combined HTML")
	parser.add_argument("--out-dir", help="Optional directory to write one HTML per conversation (plus index.html)")
	args = parser.parse_args()

	matches = filter_conversations_by_project(args.json, args.keyword)

	print(f"Found {len(matches)} conversations mentioning '{args.keyword}'")
	for convo in matches:
	title = convo.get("title") or "(untitled)"
	created = _fmt_dt(convo.get("create_time"))
	print(f"- {title} [{created}]")

	if args.out_json:
	Path(args.out_json).write_text(json.dumps(matches, indent=2, ensure_ascii=False), encoding="utf-8")
	print(f"Wrote JSON: {args.out_json}")

	if args.out_html:
	html_doc = render_html(matches, args.keyword)
	Path(args.out_html).write_text(html_doc, encoding="utf-8")
	print(f"Wrote HTML: {args.out_html}")

	if args.out_dir:
	out_dir = Path(args.out_dir)
	write_per_conversation_html(matches, out_dir, make_index=True)
	print(f"Wrote {len(matches)} conversation files to: {out_dir.resolve()}")


	if __name__ == "__main__":
	main()
No results found