Last active
October 12, 2024 06:55
-
-
Save chreke/1c3bcc6c3b38684c27e89cafcfd0d173 to your computer and use it in GitHub Desktop.
Cohost Export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from pathlib import Path | |
import argparse | |
""" | |
This is a very simple script to extract raw Markdown files from a Cohost export. | |
Feel free to modify as needed! | |
Usage: | |
Given that you've extracted your Cohost export to a directory like "export-123-12345" | |
you can run this script like this: | |
python cohost.py export-123-12345 | |
The script will output a "posts" folder in the same directory, containing the Markdown | |
of your posts (both drafts and published posts). | |
""" | |
parser = argparse.ArgumentParser( | |
prog="Cohost export", | |
description="Extracts the raw Markdown files from a Cohost export.", | |
) | |
parser.add_argument("source", help="The unzipped Cohost export directory") | |
args = parser.parse_args() | |
output_dir = Path("./posts") | |
export_dir = Path(args.source) | |
for user_dir in (export_dir / "project").iterdir(): | |
user_output_dir = output_dir / (user_dir.parts[-1]) | |
for subdir in ["published", "draft"]: | |
posts_dir = Path(user_dir / "posts") / subdir | |
user_output_subdir = user_output_dir / subdir | |
user_output_subdir.mkdir(parents=True, exist_ok=True) | |
for post in posts_dir.iterdir(): | |
post_json = post / "post.json" | |
with open(post_json) as f: | |
post_data = json.load(f) | |
title = post_data["headline"] or "Untitled" | |
slug = post.parts[-1] | |
content = "\n\n".join( | |
block["markdown"]["content"] | |
for block in post_data["blocks"] | |
if block["type"] == "markdown" | |
) | |
with open(str(user_output_subdir / slug) + ".md", "w") as f: | |
f.writelines([f"# {title}", "\n\n"]) | |
f.write(content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment