Skip to content

Instantly share code, notes, and snippets.

@chreke
Last active October 12, 2024 06:55
Show Gist options
  • Save chreke/1c3bcc6c3b38684c27e89cafcfd0d173 to your computer and use it in GitHub Desktop.
Save chreke/1c3bcc6c3b38684c27e89cafcfd0d173 to your computer and use it in GitHub Desktop.
Cohost Export
import json
from pathlib import Path
import argparse
"""
This is a very simple script to extract raw Markdown files from a Cohost export.
Feel free to modify as needed!
Usage:
Given that you've extracted your Cohost export to a directory like "export-123-12345"
you can run this script like this:
python cohost.py export-123-12345
The script will output a "posts" folder in the same directory, containing the Markdown
of your posts (both drafts and published posts).
"""
parser = argparse.ArgumentParser(
prog="Cohost export",
description="Extracts the raw Markdown files from a Cohost export.",
)
parser.add_argument("source", help="The unzipped Cohost export directory")
args = parser.parse_args()
output_dir = Path("./posts")
export_dir = Path(args.source)
for user_dir in (export_dir / "project").iterdir():
user_output_dir = output_dir / (user_dir.parts[-1])
for subdir in ["published", "draft"]:
posts_dir = Path(user_dir / "posts") / subdir
user_output_subdir = user_output_dir / subdir
user_output_subdir.mkdir(parents=True, exist_ok=True)
for post in posts_dir.iterdir():
post_json = post / "post.json"
with open(post_json) as f:
post_data = json.load(f)
title = post_data["headline"] or "Untitled"
slug = post.parts[-1]
content = "\n\n".join(
block["markdown"]["content"]
for block in post_data["blocks"]
if block["type"] == "markdown"
)
with open(str(user_output_subdir / slug) + ".md", "w") as f:
f.writelines([f"# {title}", "\n\n"])
f.write(content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment