Skip to content

Instantly share code, notes, and snippets.

@shon
Created July 13, 2025 16:20
Show Gist options
  • Save shon/247004a6bfbba47fe2cccbfcdf6fc394 to your computer and use it in GitHub Desktop.
Save shon/247004a6bfbba47fe2cccbfcdf6fc394 to your computer and use it in GitHub Desktop.
Export Github / Markdown wiki to Outline (getoutline.com)
#!/usr/bin/env python3
"""outline_wiki_export.py
Export (push) a local GitHub‑Wiki‑style folder (Markdown + images)
to an Outline (getoutline.com) workspace via its public API.
"""
from __future__ import annotations
import argparse
import mimetypes
import os
import re
import sys
import time
from pathlib import Path
import requests
from tqdm import tqdm
API_SUFFIX = "/api"
class OutlineClient:
def __init__(self, host: str, api_key: str):
self.base = host.rstrip("/") + API_SUFFIX
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"Bearer {api_key}",
"Accept": "application/json",
"Content-Type": "application/json",
})
def _post(self, method: str, payload: dict) -> dict:
url = f"{self.base}/{method}"
retries = 5
for attempt in range(retries):
r = self.session.post(url, json=payload)
if r.status_code == 429:
retry_after = r.headers.get("Retry-After")
wait = int(retry_after) if retry_after and retry_after.isdigit() else 60
print(f"Rate limit hit. Retrying in {wait}s...")
time.sleep(wait)
continue
if not r.ok:
raise RuntimeError(f"{method} → {r.status_code}: {r.text}")
body = r.json()
if not body.get("ok", False):
raise RuntimeError(f"{method} error: {body}")
return body["data"]
raise RuntimeError(f"{method} failed after retries due to rate limiting")
def get_or_create_collection(self, name: str, description: str = "") -> str:
existing = self._post("collections.list", {"query": name})
for col in existing:
if col["name"].lower() == name.lower():
return col["id"]
col = self._post("collections.create", {"name": name, "description": description})
return col["id"]
def create_document(self, title: str, text: str, collection_id: str,
parent_id: str | None = None, publish: bool = True) -> dict:
payload = {
"title": title,
"text": text,
"collectionId": collection_id,
"publish": publish,
}
if parent_id:
payload["parentDocumentId"] = parent_id
return self._post("documents.create", payload)
def update_document(self, doc_id: str, text: str, publish: bool = True):
self._post("documents.update", {"id": doc_id, "text": text, "publish": publish})
def upload_attachment(self, path: Path, document_id: str | None = None) -> str:
mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
meta = {
"name": path.name,
"contentType": mime,
"size": path.stat().st_size,
}
if document_id:
meta["documentId"] = document_id
resp = self._post("attachments.create", meta)
upload_url = resp["uploadUrl"]
fields = resp["form"]
with path.open("rb") as fh:
files = {"file": (path.name, fh, mime)}
s3 = requests.post(upload_url, data=fields, files=files)
s3.raise_for_status()
return resp["attachment"]["url"]
IMG_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
MD_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
def rewrite_markdown(md: str, working_dir: Path, client: OutlineClient) -> str:
def _sub_img(match):
raw = match.group(1).split(" ")[0]
local_path = (working_dir / raw).resolve()
if local_path.is_file():
url = client.upload_attachment(local_path)
return match.group(0).replace(raw, url)
return match.group(0)
return IMG_RE.sub(_sub_img, md)
def fix_links_in_home(content: str, url_map: dict[str, tuple[str, str]]) -> str:
def _replace_md_links(match):
label, link = match.group(1), match.group(2)
base = os.path.splitext(os.path.basename(link))[0].lower()
if base in url_map:
title, urlid = url_map[base]
slug = re.sub(r"\s+", "-", title.lower())
return f"[{label}](/doc/{slug}-{urlid})"
return match.group(0)
return MD_LINK_RE.sub(_replace_md_links, content)
def gather_markdown(root: Path):
return [p for p in root.rglob("*.md") if p.is_file()]
def pretty_title(path: Path) -> str:
name = path.stem
return re.sub(r"[-_]+", " ", name).title()
def export_repo(repo_path: Path, client: OutlineClient, collection_name: str):
collection_id = client.get_or_create_collection(collection_name)
parent_map: dict[Path, str | None] = {repo_path.resolve(): None}
file_to_urlid: dict[str, tuple[str, str]] = {}
md_files = gather_markdown(repo_path)
home_path = next((p for p in md_files if p.stem.lower() == "home"), None)
other_files = [p for p in md_files if p != home_path]
# Upload Home.md first
home_doc_id: str | None = None
if home_path:
title = pretty_title(home_path)
with home_path.open("r", encoding="utf-8") as f:
raw_md = f.read()
processed_md = rewrite_markdown(raw_md, home_path.parent, client)
doc = client.create_document(title, processed_md, collection_id)
home_doc_id = doc["id"]
file_to_urlid[home_path.stem.lower()] = (title, doc["urlId"])
# Upload other files with Home as parent
for md_file in tqdm(other_files, desc="Uploading", unit="file"):
with md_file.open("r", encoding="utf-8") as f:
raw_md = f.read()
processed_md = rewrite_markdown(raw_md, md_file.parent, client)
title = pretty_title(md_file)
doc = client.create_document(title, processed_md, collection_id, parent_id=home_doc_id)
file_stem = md_file.stem.lower()
file_to_urlid[file_stem] = (title, doc["urlId"])
# Update Home.md with fixed links
if home_path and home_doc_id:
with home_path.open("r", encoding="utf-8") as f:
content = f.read()
updated = fix_links_in_home(content, file_to_urlid)
client.update_document(home_doc_id, updated, publish=True)
def main():
parser = argparse.ArgumentParser(description="Export a GitHub wiki folder to Outline")
parser.add_argument("repo", type=Path, help="Path to local wiki repository")
parser.add_argument("--api-key", required=True, help="Outline API key")
parser.add_argument("--collection", help="Target collection name")
parser.add_argument("--host", default="https://app.getoutline.com", help="Outline base URL")
args = parser.parse_args()
if not args.repo.is_dir():
sys.exit("Provided repo path is not a directory")
collection = args.collection or args.repo.stem
client = OutlineClient(args.host, args.api_key)
try:
export_repo(args.repo, client, collection)
except KeyboardInterrupt:
print("\nInterrupted by user.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment