jbfriedrich · November 13, 2018 12:14
diff --git a/convert_posts_to_markdown.py b/convert_posts_to_markdown.py
 #!/usr/bin/env python3
 """
 Convert HTML posts that we extract via JQ, into Markdown formatted files (one file per post, title as filename)
 """

 import json
 import html2text
 import sys
 import argparse

 if __name__ == "__main__":
    # Parsing arguments
    parser = argparse.ArgumentParser(
        prog='convert_posts_to_markdown.py'
    )
    parser.add_argument(
        '-f',
        '--file',
        type=str,
        help='Filename of Ghost 2.x export'
    )
    args = parser.parse_args()
    ghost_export = args.file

    h = html2text.HTML2Text()
    h.ignore_links = False
    h.body_width = 0
    #h.single_line_break = True

    with open(ghost_export, 'r') as f:
        posts = json.load(f)
    
    for post in posts:
        title = post['title']
        slug = post['slug']
        content = post['content']

        filename = '{}.md'.format(slug)
        fcontent = '<h1>{}</h1>{}'.format(title, content)
        mcontent = h.handle(fcontent)

        with open(filename, 'w') as mdown:
            mdown.write(mcontent)
diff --git a/extract_posts.sh b/extract_posts.sh
 #!/bin/bash
 # Extract post information via jq command line json processor https://stedolan.github.io/jq/

 ghost_backup="${1}"
 posts_extract="${2}"

 cat ${ghost_backup} | jq '[.db[].data.posts[] | {title: .title, slug: .slug, date: .created_at, content: .html}]' > ${posts_extract}
diff --git a/remove_empty_lines.sh b/remove_empty_lines.sh
 #!/bin/bash
 # Remove two empty lines at the beginning of the file

 for file in *.md
 do
    mv -v ${file} ${file}.old
    gsed '1,2{/^$/d}' ${file}.old > ${file}
    rm -v ${file}.old
 done
	#!/usr/bin/env python3
	"""
	Convert HTML posts that we extract via JQ, into Markdown formatted files (one file per post, title as filename)
	"""

	import json
	import html2text
	import sys
	import argparse

	if __name__ == "__main__":
	# Parsing arguments
	parser = argparse.ArgumentParser(
	prog='convert_posts_to_markdown.py'
	)
	parser.add_argument(
	'-f',
	'--file',
	type=str,
	help='Filename of Ghost 2.x export'
	)
	args = parser.parse_args()
	ghost_export = args.file

	h = html2text.HTML2Text()
	h.ignore_links = False
	h.body_width = 0
	#h.single_line_break = True

	with open(ghost_export, 'r') as f:
	posts = json.load(f)

	for post in posts:
	title = post['title']
	slug = post['slug']
	content = post['content']

	filename = '{}.md'.format(slug)
	fcontent = '<h1>{}</h1>{}'.format(title, content)
	mcontent = h.handle(fcontent)

	with open(filename, 'w') as mdown:
	mdown.write(mcontent)
	#!/bin/bash
	# Extract post information via jq command line json processor https://stedolan.github.io/jq/

	ghost_backup="${1}"
	posts_extract="${2}"

	cat ${ghost_backup} \| jq '[.db[].data.posts[] \| {title: .title, slug: .slug, date: .created_at, content: .html}]' > ${posts_extract}
	#!/bin/bash
	# Remove two empty lines at the beginning of the file

	for file in *.md
	do
	mv -v ${file} ${file}.old
	gsed '1,2{/^$/d}' ${file}.old > ${file}
	rm -v ${file}.old
	done