Skip to content

Instantly share code, notes, and snippets.

@gmemstr
Forked from marians/import.py
Last active November 8, 2017 04:42
Show Gist options
  • Save gmemstr/0008acc9037dabafcf9f838f7018e5fd to your computer and use it in GitHub Desktop.
Save gmemstr/0008acc9037dabafcf9f838f7018e5fd to your computer and use it in GitHub Desktop.
Migrating a Ghost blog database to Jekyll
# coding: utf8
"""
This script helps to import content from a Ghost blog database to Jekyll.
The database is expected to be running on a reachalbe MySQL host.
See the very end for DB configuration.
Quick Usage:
pip install -r requirements
python import.py
Posts will be written into _posts/<filename>
Authors will be listed to standard output.
"""
import MySQLdb
import yaml
tags = {}
authors = {}
def fetch_authors():
authors_yaml = {"authors": {}}
c.execute("""SELECT
id, name, slug, bio, website, location, email
FROM users ORDER BY id""")
for entry in c.fetchall():
authors[str(entry[0])] = entry[2]
authors_yaml["authors"][entry[2]] = {
"name": entry[1],
"bio": entry[3],
"website": entry[4],
"location": entry[5],
"email": entry[6],
}
print("Paste this part into your '_config.yml':")
print(yaml.dump(authors_yaml, default_flow_style=False))
def fetch_tags():
c.execute("""SELECT id, slug FROM tags ORDER BY id""")
for entry in c.fetchall():
tags[str(entry[0])] = entry[1]
def clean_text(txt):
"""Fix codepage weirdness for all the Unicode characters we used"""
txt = txt.replace("’", "’")
txt = txt.replace("‘", "‘")
txt = txt.replace("–", "–")
txt = txt.replace("—", "–")
txt = txt.replace("―", "—")
txt = txt.replace("“", "\"")
txt = txt.replace("”", "\"")
txt = txt.replace("â–ˆ", "█")
txt = txt.replace("â–‹", "▋")
txt = txt.replace("▍", "▍")
txt = txt.replace("â–Ž", "▎")
txt = txt.replace("▏", "▏")
txt = txt.replace("â–Š", "▊")
txt = txt.replace("â–‰", "▉")
txt = txt.replace("✅", "✅")
txt = txt.replace("„", "„")
txt = txt.replace("…", "…")
txt = txt.replace(" ", "")
txt = txt.replace("ä", "ä")
txt = txt.replace("ö", "ö")
txt = txt.replace("ü", "ü")
txt = txt.replace("é", "é")
return txt
def fetch_posts():
c.execute("""SELECT
id, title, slug, plaintext, meta_title, meta_description,
published_at, updated_at, author_id
FROM posts WHERE status='published' ORDER BY id""")
for entry in c.fetchall():
author = authors[str(entry[8])]
date = str(entry[6])[0:10]
filename = date + "-" + entry[2] + ".md"
path = "_posts/" + filename
c2.execute("SELECT tag_id FROM posts_tags WHERE post_id=%s", (entry[0],))
mytags = []
for t in c2.fetchall():
mytags.append(tags[str(t[0])])
# frontmatter
frontmatter = {}
frontmatter["title"] = clean_text(entry[1])
frontmatter["date"] = str(entry[6]) + " +0000"
frontmatter["categories"] = mytags
frontmatter["author"] = authors[str(entry[8])]
md = "---\n" + yaml.dump(frontmatter) + "---\n\n"
### text cleanup
text = entry[3]
text = text.replace("/content/images/", "/assets/")
text = clean_text(text)
md += text
with open(path, "w+") as markdownfile:
markdownfile.write(md)
if __name__ == "__main__":
db = MySQLdb.connect(host="localhost", user="ghost", passwd="password", db="ghost")
c = db.cursor()
c2 = db.cursor()
c.execute("SET NAMES utf8")
c.execute("SET CHARSET utf8")
fetch_tags()
fetch_authors()
fetch_posts()
MySQL-python==1.2.5
PyYAML==3.12
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment