Created
July 13, 2025 16:20
-
-
Save shon/247004a6bfbba47fe2cccbfcdf6fc394 to your computer and use it in GitHub Desktop.
Export Github / Markdown wiki to Outline (getoutline.com)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""outline_wiki_export.py | |
Export (push) a local GitHub‑Wiki‑style folder (Markdown + images) | |
to an Outline (getoutline.com) workspace via its public API. | |
""" | |
from __future__ import annotations | |
import argparse | |
import mimetypes | |
import os | |
import re | |
import sys | |
import time | |
from pathlib import Path | |
import requests | |
from tqdm import tqdm | |
API_SUFFIX = "/api" | |
class OutlineClient: | |
def __init__(self, host: str, api_key: str): | |
self.base = host.rstrip("/") + API_SUFFIX | |
self.session = requests.Session() | |
self.session.headers.update({ | |
"Authorization": f"Bearer {api_key}", | |
"Accept": "application/json", | |
"Content-Type": "application/json", | |
}) | |
def _post(self, method: str, payload: dict) -> dict: | |
url = f"{self.base}/{method}" | |
retries = 5 | |
for attempt in range(retries): | |
r = self.session.post(url, json=payload) | |
if r.status_code == 429: | |
retry_after = r.headers.get("Retry-After") | |
wait = int(retry_after) if retry_after and retry_after.isdigit() else 60 | |
print(f"Rate limit hit. Retrying in {wait}s...") | |
time.sleep(wait) | |
continue | |
if not r.ok: | |
raise RuntimeError(f"{method} → {r.status_code}: {r.text}") | |
body = r.json() | |
if not body.get("ok", False): | |
raise RuntimeError(f"{method} error: {body}") | |
return body["data"] | |
raise RuntimeError(f"{method} failed after retries due to rate limiting") | |
def get_or_create_collection(self, name: str, description: str = "") -> str: | |
existing = self._post("collections.list", {"query": name}) | |
for col in existing: | |
if col["name"].lower() == name.lower(): | |
return col["id"] | |
col = self._post("collections.create", {"name": name, "description": description}) | |
return col["id"] | |
def create_document(self, title: str, text: str, collection_id: str, | |
parent_id: str | None = None, publish: bool = True) -> dict: | |
payload = { | |
"title": title, | |
"text": text, | |
"collectionId": collection_id, | |
"publish": publish, | |
} | |
if parent_id: | |
payload["parentDocumentId"] = parent_id | |
return self._post("documents.create", payload) | |
def update_document(self, doc_id: str, text: str, publish: bool = True): | |
self._post("documents.update", {"id": doc_id, "text": text, "publish": publish}) | |
def upload_attachment(self, path: Path, document_id: str | None = None) -> str: | |
mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream" | |
meta = { | |
"name": path.name, | |
"contentType": mime, | |
"size": path.stat().st_size, | |
} | |
if document_id: | |
meta["documentId"] = document_id | |
resp = self._post("attachments.create", meta) | |
upload_url = resp["uploadUrl"] | |
fields = resp["form"] | |
with path.open("rb") as fh: | |
files = {"file": (path.name, fh, mime)} | |
s3 = requests.post(upload_url, data=fields, files=files) | |
s3.raise_for_status() | |
return resp["attachment"]["url"] | |
IMG_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") | |
MD_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") | |
def rewrite_markdown(md: str, working_dir: Path, client: OutlineClient) -> str: | |
def _sub_img(match): | |
raw = match.group(1).split(" ")[0] | |
local_path = (working_dir / raw).resolve() | |
if local_path.is_file(): | |
url = client.upload_attachment(local_path) | |
return match.group(0).replace(raw, url) | |
return match.group(0) | |
return IMG_RE.sub(_sub_img, md) | |
def fix_links_in_home(content: str, url_map: dict[str, tuple[str, str]]) -> str: | |
def _replace_md_links(match): | |
label, link = match.group(1), match.group(2) | |
base = os.path.splitext(os.path.basename(link))[0].lower() | |
if base in url_map: | |
title, urlid = url_map[base] | |
slug = re.sub(r"\s+", "-", title.lower()) | |
return f"[{label}](/doc/{slug}-{urlid})" | |
return match.group(0) | |
return MD_LINK_RE.sub(_replace_md_links, content) | |
def gather_markdown(root: Path): | |
return [p for p in root.rglob("*.md") if p.is_file()] | |
def pretty_title(path: Path) -> str: | |
name = path.stem | |
return re.sub(r"[-_]+", " ", name).title() | |
def export_repo(repo_path: Path, client: OutlineClient, collection_name: str): | |
collection_id = client.get_or_create_collection(collection_name) | |
parent_map: dict[Path, str | None] = {repo_path.resolve(): None} | |
file_to_urlid: dict[str, tuple[str, str]] = {} | |
md_files = gather_markdown(repo_path) | |
home_path = next((p for p in md_files if p.stem.lower() == "home"), None) | |
other_files = [p for p in md_files if p != home_path] | |
# Upload Home.md first | |
home_doc_id: str | None = None | |
if home_path: | |
title = pretty_title(home_path) | |
with home_path.open("r", encoding="utf-8") as f: | |
raw_md = f.read() | |
processed_md = rewrite_markdown(raw_md, home_path.parent, client) | |
doc = client.create_document(title, processed_md, collection_id) | |
home_doc_id = doc["id"] | |
file_to_urlid[home_path.stem.lower()] = (title, doc["urlId"]) | |
# Upload other files with Home as parent | |
for md_file in tqdm(other_files, desc="Uploading", unit="file"): | |
with md_file.open("r", encoding="utf-8") as f: | |
raw_md = f.read() | |
processed_md = rewrite_markdown(raw_md, md_file.parent, client) | |
title = pretty_title(md_file) | |
doc = client.create_document(title, processed_md, collection_id, parent_id=home_doc_id) | |
file_stem = md_file.stem.lower() | |
file_to_urlid[file_stem] = (title, doc["urlId"]) | |
# Update Home.md with fixed links | |
if home_path and home_doc_id: | |
with home_path.open("r", encoding="utf-8") as f: | |
content = f.read() | |
updated = fix_links_in_home(content, file_to_urlid) | |
client.update_document(home_doc_id, updated, publish=True) | |
def main(): | |
parser = argparse.ArgumentParser(description="Export a GitHub wiki folder to Outline") | |
parser.add_argument("repo", type=Path, help="Path to local wiki repository") | |
parser.add_argument("--api-key", required=True, help="Outline API key") | |
parser.add_argument("--collection", help="Target collection name") | |
parser.add_argument("--host", default="https://app.getoutline.com", help="Outline base URL") | |
args = parser.parse_args() | |
if not args.repo.is_dir(): | |
sys.exit("Provided repo path is not a directory") | |
collection = args.collection or args.repo.stem | |
client = OutlineClient(args.host, args.api_key) | |
try: | |
export_repo(args.repo, client, collection) | |
except KeyboardInterrupt: | |
print("\nInterrupted by user.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment