Last active
February 18, 2026 17:06
-
-
Save itstauq/bcdd75a8030a598a44413fa06bbf649c to your computer and use it in GitHub Desktop.
Convert ChatGPT and Claude Code conversation exports to clean markdown for importing into OpenClaw memory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Convert Claude Code and ChatGPT conversation exports to clean markdown. | |
| Usage: | |
| python convert-ai-history.py --claude ~/.claude/projects --chatgpt ~/Downloads/conversations.json --output ~/openclaw/memory | |
| # Claude only | |
| python convert-ai-history.py --claude ~/.claude/projects --output ~/openclaw/memory | |
| # ChatGPT only | |
| python convert-ai-history.py --chatgpt ~/Downloads/conversations.json --output ~/openclaw/memory | |
| # Dry run (preview without writing) | |
| python convert-ai-history.py --claude ~/.claude/projects --output ~/openclaw/memory --dry-run | |
| Outputs: | |
| {output}/claude-code-history/*.md | |
| {output}/chatgpt-history/*.md | |
| """ | |
| import argparse | |
| import getpass | |
| import json | |
| import re | |
| import sys | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| def slugify(text, max_len=60): | |
| """Convert text to a filename-safe slug.""" | |
| text = text.lower().strip() | |
| text = re.sub(r'[^\w\s-]', '', text) | |
| text = re.sub(r'[\s_]+', '-', text) | |
| text = re.sub(r'-+', '-', text).strip('-') | |
| return text[:max_len] or 'untitled' | |
| def extract_text_from_content(content): | |
| """Extract readable text from assistant message content (list of blocks).""" | |
| if isinstance(content, str): | |
| return content | |
| if isinstance(content, list): | |
| texts = [] | |
| for block in content: | |
| if isinstance(block, str): | |
| texts.append(block) | |
| elif isinstance(block, dict): | |
| if block.get('type') == 'text': | |
| texts.append(block.get('text', '')) | |
| elif block.get('type') == 'tool_use': | |
| name = block.get('name', 'tool') | |
| inp = block.get('input', {}) | |
| # Summarize tool use concisely | |
| if name in ('Edit', 'Write'): | |
| fp = inp.get('file_path', inp.get('path', '?')) | |
| texts.append(f"*[Used {name} on `{fp}`]*") | |
| elif name == 'Bash': | |
| cmd = inp.get('command', '?') | |
| if len(cmd) > 120: | |
| cmd = cmd[:120] + '...' | |
| texts.append(f"*[Ran `{cmd}`]*") | |
| elif name == 'Read': | |
| fp = inp.get('file_path', inp.get('path', '?')) | |
| texts.append(f"*[Read `{fp}`]*") | |
| else: | |
| texts.append(f"*[Used {name}]*") | |
| elif block.get('type') == 'tool_result': | |
| pass # skip tool results — too verbose | |
| # skip 'thinking' blocks | |
| return '\n\n'.join(t for t in texts if t.strip()) | |
| return str(content) | |
| def clean_project_path(dirname): | |
| """Remove username-specific path prefixes from directory names.""" | |
| username = getpass.getuser() | |
| # Handle various path encodings from Claude Code | |
| # e.g., "-Users-alice-Projects-foo" -> "Projects/foo" | |
| patterns = [ | |
| rf'^-Users-{username}-?', | |
| rf'^-home-{username}-?', | |
| r'^-Users-[^-]+-?', # fallback for any username | |
| r'^-home-[^-]+-?', | |
| ] | |
| name = dirname | |
| for pattern in patterns: | |
| name = re.sub(pattern, '', name, flags=re.IGNORECASE) | |
| return name.replace('-', '/') | |
| # ─── Claude Code JSONL ─────────────────────────────────────────────── | |
| def convert_claude_session(jsonl_path): | |
| """Convert a single Claude Code JSONL session to markdown.""" | |
| entries = [] | |
| with open(jsonl_path) as f: | |
| for line in f: | |
| line = line.strip() | |
| if line: | |
| try: | |
| entries.append(json.loads(line)) | |
| except json.JSONDecodeError: | |
| continue | |
| if not entries: | |
| return None, None | |
| # Extract metadata | |
| summaries = [e['summary'] for e in entries if e.get('type') == 'summary'] | |
| title = summaries[0] if summaries else 'Untitled Session' | |
| # Get project from first user entry | |
| project = None | |
| session_id = None | |
| timestamp = None | |
| git_branch = None | |
| for e in entries: | |
| if e.get('type') == 'user': | |
| cwd = e.get('cwd', '') | |
| project = project or cwd.split('/')[-1] if cwd else None | |
| session_id = session_id or e.get('sessionId') | |
| git_branch = git_branch or e.get('gitBranch') | |
| ts = e.get('timestamp') | |
| if ts and not timestamp: | |
| timestamp = ts | |
| # Parse date | |
| date_str = 'Unknown' | |
| if timestamp: | |
| try: | |
| dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) | |
| date_str = dt.strftime('%Y-%m-%d %H:%M UTC') | |
| except: | |
| date_str = timestamp[:10] if len(timestamp) >= 10 else timestamp | |
| # Build markdown | |
| lines = [f"# {title}\n"] | |
| meta = [] | |
| if project: | |
| meta.append(f"**Project:** {project}") | |
| if date_str: | |
| meta.append(f"**Date:** {date_str}") | |
| if git_branch: | |
| meta.append(f"**Branch:** {git_branch}") | |
| if session_id: | |
| meta.append(f"**Session:** `{session_id}`") | |
| if meta: | |
| lines.append(' | '.join(meta) + '\n') | |
| if len(summaries) > 1: | |
| lines.append("**Topics:** " + ' → '.join(summaries) + '\n') | |
| lines.append('---\n') | |
| # Conversation turns | |
| for e in entries: | |
| etype = e.get('type') | |
| if etype == 'user': | |
| content = e.get('message', {}).get('content', '') | |
| text = extract_text_from_content(content) | |
| if text.strip(): | |
| lines.append(f"## User\n\n{text.strip()}\n") | |
| elif etype == 'assistant': | |
| content = e.get('message', {}).get('content', '') | |
| text = extract_text_from_content(content) | |
| if text.strip(): | |
| lines.append(f"## Assistant\n\n{text.strip()}\n") | |
| md = '\n'.join(lines) | |
| # Generate filename | |
| date_prefix = date_str[:10] if date_str != 'Unknown' else '' | |
| slug = slugify(title) | |
| filename = f"{date_prefix}-{slug}.md" if date_prefix else f"{slug}.md" | |
| return filename, md | |
| def convert_claude_dump(dump_dir, output_dir, dry_run=False): | |
| """Convert all Claude Code sessions from a dump directory.""" | |
| dump_path = Path(dump_dir).expanduser() | |
| out_path = Path(output_dir).expanduser() | |
| if not dump_path.exists(): | |
| print(f" ✗ Claude dump not found: {dump_path}") | |
| return 0, 0 | |
| if not dry_run: | |
| out_path.mkdir(parents=True, exist_ok=True) | |
| converted = 0 | |
| skipped = 0 | |
| for project_dir in sorted(dump_path.iterdir()): | |
| if not project_dir.is_dir(): | |
| continue | |
| # Clean up project name (remove username-specific paths) | |
| project_name = clean_project_path(project_dir.name) | |
| for jsonl_file in sorted(project_dir.glob('*.jsonl')): | |
| filename, md = convert_claude_session(jsonl_file) | |
| if not filename or not md: | |
| skipped += 1 | |
| continue | |
| # Prefix with project for uniqueness | |
| project_slug = slugify(project_name.split('/')[-1] if '/' in project_name else project_name, 20) | |
| final_name = f"{project_slug}--{filename}" | |
| out_file = out_path / final_name | |
| # Deduplicate | |
| counter = 1 | |
| while out_file.exists(): | |
| stem = final_name.rsplit('.', 1)[0] | |
| out_file = out_path / f"{stem}-{counter}.md" | |
| counter += 1 | |
| if dry_run: | |
| print(f" [dry-run] {out_file.name}") | |
| else: | |
| out_file.write_text(md) | |
| converted += 1 | |
| return converted, skipped | |
| # ─── ChatGPT JSON ──────────────────────────────────────────────────── | |
| def convert_chatgpt_conversation(convo): | |
| """Convert a single ChatGPT conversation to markdown.""" | |
| title = convo.get('title', 'Untitled') | |
| create_time = convo.get('create_time') | |
| model = convo.get('default_model_slug', '') | |
| convo_id = convo.get('id', convo.get('conversation_id', '')) | |
| # Parse date | |
| date_str = 'Unknown' | |
| if create_time: | |
| try: | |
| dt = datetime.fromtimestamp(create_time, tz=timezone.utc) | |
| date_str = dt.strftime('%Y-%m-%d %H:%M UTC') | |
| except: | |
| pass | |
| # Build conversation tree → linear order | |
| mapping = convo.get('mapping', {}) | |
| current = convo.get('current_node') | |
| # Walk from current_node back to root to get the path | |
| path = [] | |
| node_id = current | |
| while node_id and node_id in mapping: | |
| path.append(node_id) | |
| node_id = mapping[node_id].get('parent') | |
| path.reverse() | |
| # Build markdown | |
| lines = [f"# {title}\n"] | |
| meta = [] | |
| if date_str: | |
| meta.append(f"**Date:** {date_str}") | |
| if model: | |
| meta.append(f"**Model:** {model}") | |
| if convo_id: | |
| meta.append(f"**ID:** `{convo_id}`") | |
| if meta: | |
| lines.append(' | '.join(meta) + '\n') | |
| lines.append('---\n') | |
| for nid in path: | |
| node = mapping[nid] | |
| msg = node.get('message') | |
| if not msg: | |
| continue | |
| role = msg.get('author', {}).get('role', '') | |
| content = msg.get('content', {}) | |
| parts = content.get('parts', []) | |
| if role == 'system': | |
| continue | |
| text_parts = [] | |
| for part in parts: | |
| if isinstance(part, str) and part.strip(): | |
| text_parts.append(part) | |
| elif isinstance(part, dict): | |
| # Could be image, code, etc | |
| ct = part.get('content_type', '') | |
| if ct == 'text' and part.get('text'): | |
| text_parts.append(part['text']) | |
| text = '\n\n'.join(text_parts) | |
| if not text.strip(): | |
| continue | |
| role_label = 'User' if role == 'user' else 'Assistant' if role == 'assistant' else role.title() | |
| lines.append(f"## {role_label}\n\n{text.strip()}\n") | |
| md = '\n'.join(lines) | |
| date_prefix = date_str[:10] if date_str != 'Unknown' else '' | |
| slug = slugify(title) | |
| filename = f"{date_prefix}-{slug}.md" if date_prefix else f"{slug}.md" | |
| return filename, md | |
| def convert_chatgpt_dump(json_path, output_dir, dry_run=False): | |
| """Convert all ChatGPT conversations from export JSON.""" | |
| json_path = Path(json_path).expanduser() | |
| out_path = Path(output_dir).expanduser() | |
| if not json_path.exists(): | |
| print(f" ✗ ChatGPT export not found: {json_path}") | |
| return 0, 0 | |
| if not dry_run: | |
| out_path.mkdir(parents=True, exist_ok=True) | |
| with open(json_path) as f: | |
| convos = json.load(f) | |
| converted = 0 | |
| skipped = 0 | |
| for convo in convos: | |
| filename, md = convert_chatgpt_conversation(convo) | |
| if not filename or not md or md.count('##') <= 1: | |
| skipped += 1 | |
| continue | |
| out_file = out_path / filename | |
| counter = 1 | |
| while out_file.exists(): | |
| stem = filename.rsplit('.', 1)[0] | |
| out_file = out_path / f"{stem}-{counter}.md" | |
| counter += 1 | |
| if dry_run: | |
| print(f" [dry-run] {out_file.name}") | |
| else: | |
| out_file.write_text(md) | |
| converted += 1 | |
| return converted, skipped | |
| # ─── Main ───────────────────────────────────────────────────────────── | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description='Convert Claude Code and ChatGPT exports to markdown.', | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| # Convert both | |
| %(prog)s --claude ~/.claude/projects --chatgpt ~/Downloads/conversations.json -o ~/openclaw/memory | |
| # Claude only | |
| %(prog)s --claude ~/.claude/projects -o ~/openclaw/memory | |
| # ChatGPT only | |
| %(prog)s --chatgpt ~/Downloads/conversations.json -o ~/openclaw/memory | |
| # Preview without writing | |
| %(prog)s --claude ~/.claude/projects -o ~/openclaw/memory --dry-run | |
| """ | |
| ) | |
| parser.add_argument('--claude', metavar='DIR', | |
| help='Path to Claude Code projects directory (e.g., ~/.claude/projects)') | |
| parser.add_argument('--chatgpt', metavar='FILE', | |
| help='Path to ChatGPT conversations.json export') | |
| parser.add_argument('-o', '--output', metavar='DIR', required=True, | |
| help='Output directory (will create claude-code-history/ and chatgpt-history/ subdirs)') | |
| parser.add_argument('--dry-run', action='store_true', | |
| help='Preview files that would be created without writing') | |
| args = parser.parse_args() | |
| if not args.claude and not args.chatgpt: | |
| parser.error('At least one of --claude or --chatgpt is required') | |
| output_base = Path(args.output).expanduser() | |
| total_converted = 0 | |
| total_skipped = 0 | |
| if args.claude: | |
| print("Converting Claude Code sessions...") | |
| claude_out = output_base / 'claude-code-history' | |
| c_conv, c_skip = convert_claude_dump(args.claude, claude_out, args.dry_run) | |
| print(f" ✓ {c_conv} converted, {c_skip} skipped") | |
| total_converted += c_conv | |
| total_skipped += c_skip | |
| if args.chatgpt: | |
| print("Converting ChatGPT conversations...") | |
| chatgpt_out = output_base / 'chatgpt-history' | |
| g_conv, g_skip = convert_chatgpt_dump(args.chatgpt, chatgpt_out, args.dry_run) | |
| print(f" ✓ {g_conv} converted, {g_skip} skipped") | |
| total_converted += g_conv | |
| total_skipped += g_skip | |
| print(f"\n{'[DRY RUN] ' if args.dry_run else ''}Total: {total_converted} files {'would be ' if args.dry_run else ''}created") | |
| if not args.dry_run: | |
| print(f"\nOutput: {output_base}/") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment