Created
November 13, 2018 12:14
-
-
Save jbfriedrich/569ccf02faf0d754cf490bb6eb80ccea to your computer and use it in GitHub Desktop.
Extracting post title, slug and content from a Ghost 2.x backup and import it to a write.as blog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Convert HTML posts that we extract via JQ, into Markdown formatted files (one file per post, title as filename) | |
""" | |
import json | |
import html2text | |
import sys | |
import argparse | |
if __name__ == "__main__": | |
# Parsing arguments | |
parser = argparse.ArgumentParser( | |
prog='convert_posts_to_markdown.py' | |
) | |
parser.add_argument( | |
'-f', | |
'--file', | |
type=str, | |
help='Filename of Ghost 2.x export' | |
) | |
args = parser.parse_args() | |
ghost_export = args.file | |
h = html2text.HTML2Text() | |
h.ignore_links = False | |
h.body_width = 0 | |
#h.single_line_break = True | |
with open(ghost_export, 'r') as f: | |
posts = json.load(f) | |
for post in posts: | |
title = post['title'] | |
slug = post['slug'] | |
content = post['content'] | |
filename = '{}.md'.format(slug) | |
fcontent = '<h1>{}</h1>{}'.format(title, content) | |
mcontent = h.handle(fcontent) | |
with open(filename, 'w') as mdown: | |
mdown.write(mcontent) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Extract post information via jq command line json processor https://stedolan.github.io/jq/ | |
ghost_backup="${1}" | |
posts_extract="${2}" | |
cat ${ghost_backup} | jq '[.db[].data.posts[] | {title: .title, slug: .slug, date: .created_at, content: .html}]' > ${posts_extract} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Remove two empty lines at the beginning of the file | |
for file in *.md | |
do | |
mv -v ${file} ${file}.old | |
gsed '1,2{/^$/d}' ${file}.old > ${file} | |
rm -v ${file}.old | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment