Skip to content

Instantly share code, notes, and snippets.

@itstauq
Last active February 18, 2026 17:06
Show Gist options
  • Select an option

  • Save itstauq/bcdd75a8030a598a44413fa06bbf649c to your computer and use it in GitHub Desktop.

Select an option

Save itstauq/bcdd75a8030a598a44413fa06bbf649c to your computer and use it in GitHub Desktop.
Convert ChatGPT and Claude Code conversation exports to clean markdown for importing into OpenClaw memory
#!/usr/bin/env python3
"""
Convert Claude Code and ChatGPT conversation exports to clean markdown.
Usage:
python convert-ai-history.py --claude ~/.claude/projects --chatgpt ~/Downloads/conversations.json --output ~/openclaw/memory
# Claude only
python convert-ai-history.py --claude ~/.claude/projects --output ~/openclaw/memory
# ChatGPT only
python convert-ai-history.py --chatgpt ~/Downloads/conversations.json --output ~/openclaw/memory
# Dry run (preview without writing)
python convert-ai-history.py --claude ~/.claude/projects --output ~/openclaw/memory --dry-run
Outputs:
{output}/claude-code-history/*.md
{output}/chatgpt-history/*.md
"""
import argparse
import getpass
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
def slugify(text, max_len=60):
"""Convert text to a filename-safe slug."""
text = text.lower().strip()
text = re.sub(r'[^\w\s-]', '', text)
text = re.sub(r'[\s_]+', '-', text)
text = re.sub(r'-+', '-', text).strip('-')
return text[:max_len] or 'untitled'
def extract_text_from_content(content):
"""Extract readable text from assistant message content (list of blocks)."""
if isinstance(content, str):
return content
if isinstance(content, list):
texts = []
for block in content:
if isinstance(block, str):
texts.append(block)
elif isinstance(block, dict):
if block.get('type') == 'text':
texts.append(block.get('text', ''))
elif block.get('type') == 'tool_use':
name = block.get('name', 'tool')
inp = block.get('input', {})
# Summarize tool use concisely
if name in ('Edit', 'Write'):
fp = inp.get('file_path', inp.get('path', '?'))
texts.append(f"*[Used {name} on `{fp}`]*")
elif name == 'Bash':
cmd = inp.get('command', '?')
if len(cmd) > 120:
cmd = cmd[:120] + '...'
texts.append(f"*[Ran `{cmd}`]*")
elif name == 'Read':
fp = inp.get('file_path', inp.get('path', '?'))
texts.append(f"*[Read `{fp}`]*")
else:
texts.append(f"*[Used {name}]*")
elif block.get('type') == 'tool_result':
pass # skip tool results — too verbose
# skip 'thinking' blocks
return '\n\n'.join(t for t in texts if t.strip())
return str(content)
def clean_project_path(dirname):
"""Remove username-specific path prefixes from directory names."""
username = getpass.getuser()
# Handle various path encodings from Claude Code
# e.g., "-Users-alice-Projects-foo" -> "Projects/foo"
patterns = [
rf'^-Users-{username}-?',
rf'^-home-{username}-?',
r'^-Users-[^-]+-?', # fallback for any username
r'^-home-[^-]+-?',
]
name = dirname
for pattern in patterns:
name = re.sub(pattern, '', name, flags=re.IGNORECASE)
return name.replace('-', '/')
# ─── Claude Code JSONL ───────────────────────────────────────────────
def convert_claude_session(jsonl_path):
"""Convert a single Claude Code JSONL session to markdown."""
entries = []
with open(jsonl_path) as f:
for line in f:
line = line.strip()
if line:
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
continue
if not entries:
return None, None
# Extract metadata
summaries = [e['summary'] for e in entries if e.get('type') == 'summary']
title = summaries[0] if summaries else 'Untitled Session'
# Get project from first user entry
project = None
session_id = None
timestamp = None
git_branch = None
for e in entries:
if e.get('type') == 'user':
cwd = e.get('cwd', '')
project = project or cwd.split('/')[-1] if cwd else None
session_id = session_id or e.get('sessionId')
git_branch = git_branch or e.get('gitBranch')
ts = e.get('timestamp')
if ts and not timestamp:
timestamp = ts
# Parse date
date_str = 'Unknown'
if timestamp:
try:
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
date_str = dt.strftime('%Y-%m-%d %H:%M UTC')
except:
date_str = timestamp[:10] if len(timestamp) >= 10 else timestamp
# Build markdown
lines = [f"# {title}\n"]
meta = []
if project:
meta.append(f"**Project:** {project}")
if date_str:
meta.append(f"**Date:** {date_str}")
if git_branch:
meta.append(f"**Branch:** {git_branch}")
if session_id:
meta.append(f"**Session:** `{session_id}`")
if meta:
lines.append(' | '.join(meta) + '\n')
if len(summaries) > 1:
lines.append("**Topics:** " + ' → '.join(summaries) + '\n')
lines.append('---\n')
# Conversation turns
for e in entries:
etype = e.get('type')
if etype == 'user':
content = e.get('message', {}).get('content', '')
text = extract_text_from_content(content)
if text.strip():
lines.append(f"## User\n\n{text.strip()}\n")
elif etype == 'assistant':
content = e.get('message', {}).get('content', '')
text = extract_text_from_content(content)
if text.strip():
lines.append(f"## Assistant\n\n{text.strip()}\n")
md = '\n'.join(lines)
# Generate filename
date_prefix = date_str[:10] if date_str != 'Unknown' else ''
slug = slugify(title)
filename = f"{date_prefix}-{slug}.md" if date_prefix else f"{slug}.md"
return filename, md
def convert_claude_dump(dump_dir, output_dir, dry_run=False):
"""Convert all Claude Code sessions from a dump directory."""
dump_path = Path(dump_dir).expanduser()
out_path = Path(output_dir).expanduser()
if not dump_path.exists():
print(f" ✗ Claude dump not found: {dump_path}")
return 0, 0
if not dry_run:
out_path.mkdir(parents=True, exist_ok=True)
converted = 0
skipped = 0
for project_dir in sorted(dump_path.iterdir()):
if not project_dir.is_dir():
continue
# Clean up project name (remove username-specific paths)
project_name = clean_project_path(project_dir.name)
for jsonl_file in sorted(project_dir.glob('*.jsonl')):
filename, md = convert_claude_session(jsonl_file)
if not filename or not md:
skipped += 1
continue
# Prefix with project for uniqueness
project_slug = slugify(project_name.split('/')[-1] if '/' in project_name else project_name, 20)
final_name = f"{project_slug}--{filename}"
out_file = out_path / final_name
# Deduplicate
counter = 1
while out_file.exists():
stem = final_name.rsplit('.', 1)[0]
out_file = out_path / f"{stem}-{counter}.md"
counter += 1
if dry_run:
print(f" [dry-run] {out_file.name}")
else:
out_file.write_text(md)
converted += 1
return converted, skipped
# ─── ChatGPT JSON ────────────────────────────────────────────────────
def convert_chatgpt_conversation(convo):
"""Convert a single ChatGPT conversation to markdown."""
title = convo.get('title', 'Untitled')
create_time = convo.get('create_time')
model = convo.get('default_model_slug', '')
convo_id = convo.get('id', convo.get('conversation_id', ''))
# Parse date
date_str = 'Unknown'
if create_time:
try:
dt = datetime.fromtimestamp(create_time, tz=timezone.utc)
date_str = dt.strftime('%Y-%m-%d %H:%M UTC')
except:
pass
# Build conversation tree → linear order
mapping = convo.get('mapping', {})
current = convo.get('current_node')
# Walk from current_node back to root to get the path
path = []
node_id = current
while node_id and node_id in mapping:
path.append(node_id)
node_id = mapping[node_id].get('parent')
path.reverse()
# Build markdown
lines = [f"# {title}\n"]
meta = []
if date_str:
meta.append(f"**Date:** {date_str}")
if model:
meta.append(f"**Model:** {model}")
if convo_id:
meta.append(f"**ID:** `{convo_id}`")
if meta:
lines.append(' | '.join(meta) + '\n')
lines.append('---\n')
for nid in path:
node = mapping[nid]
msg = node.get('message')
if not msg:
continue
role = msg.get('author', {}).get('role', '')
content = msg.get('content', {})
parts = content.get('parts', [])
if role == 'system':
continue
text_parts = []
for part in parts:
if isinstance(part, str) and part.strip():
text_parts.append(part)
elif isinstance(part, dict):
# Could be image, code, etc
ct = part.get('content_type', '')
if ct == 'text' and part.get('text'):
text_parts.append(part['text'])
text = '\n\n'.join(text_parts)
if not text.strip():
continue
role_label = 'User' if role == 'user' else 'Assistant' if role == 'assistant' else role.title()
lines.append(f"## {role_label}\n\n{text.strip()}\n")
md = '\n'.join(lines)
date_prefix = date_str[:10] if date_str != 'Unknown' else ''
slug = slugify(title)
filename = f"{date_prefix}-{slug}.md" if date_prefix else f"{slug}.md"
return filename, md
def convert_chatgpt_dump(json_path, output_dir, dry_run=False):
"""Convert all ChatGPT conversations from export JSON."""
json_path = Path(json_path).expanduser()
out_path = Path(output_dir).expanduser()
if not json_path.exists():
print(f" ✗ ChatGPT export not found: {json_path}")
return 0, 0
if not dry_run:
out_path.mkdir(parents=True, exist_ok=True)
with open(json_path) as f:
convos = json.load(f)
converted = 0
skipped = 0
for convo in convos:
filename, md = convert_chatgpt_conversation(convo)
if not filename or not md or md.count('##') <= 1:
skipped += 1
continue
out_file = out_path / filename
counter = 1
while out_file.exists():
stem = filename.rsplit('.', 1)[0]
out_file = out_path / f"{stem}-{counter}.md"
counter += 1
if dry_run:
print(f" [dry-run] {out_file.name}")
else:
out_file.write_text(md)
converted += 1
return converted, skipped
# ─── Main ─────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description='Convert Claude Code and ChatGPT exports to markdown.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Convert both
%(prog)s --claude ~/.claude/projects --chatgpt ~/Downloads/conversations.json -o ~/openclaw/memory
# Claude only
%(prog)s --claude ~/.claude/projects -o ~/openclaw/memory
# ChatGPT only
%(prog)s --chatgpt ~/Downloads/conversations.json -o ~/openclaw/memory
# Preview without writing
%(prog)s --claude ~/.claude/projects -o ~/openclaw/memory --dry-run
"""
)
parser.add_argument('--claude', metavar='DIR',
help='Path to Claude Code projects directory (e.g., ~/.claude/projects)')
parser.add_argument('--chatgpt', metavar='FILE',
help='Path to ChatGPT conversations.json export')
parser.add_argument('-o', '--output', metavar='DIR', required=True,
help='Output directory (will create claude-code-history/ and chatgpt-history/ subdirs)')
parser.add_argument('--dry-run', action='store_true',
help='Preview files that would be created without writing')
args = parser.parse_args()
if not args.claude and not args.chatgpt:
parser.error('At least one of --claude or --chatgpt is required')
output_base = Path(args.output).expanduser()
total_converted = 0
total_skipped = 0
if args.claude:
print("Converting Claude Code sessions...")
claude_out = output_base / 'claude-code-history'
c_conv, c_skip = convert_claude_dump(args.claude, claude_out, args.dry_run)
print(f" ✓ {c_conv} converted, {c_skip} skipped")
total_converted += c_conv
total_skipped += c_skip
if args.chatgpt:
print("Converting ChatGPT conversations...")
chatgpt_out = output_base / 'chatgpt-history'
g_conv, g_skip = convert_chatgpt_dump(args.chatgpt, chatgpt_out, args.dry_run)
print(f" ✓ {g_conv} converted, {g_skip} skipped")
total_converted += g_conv
total_skipped += g_skip
print(f"\n{'[DRY RUN] ' if args.dry_run else ''}Total: {total_converted} files {'would be ' if args.dry_run else ''}created")
if not args.dry_run:
print(f"\nOutput: {output_base}/")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment