-
-
Save gmemstr/0008acc9037dabafcf9f838f7018e5fd to your computer and use it in GitHub Desktop.
Migrating a Ghost blog database to Jekyll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf8 | |
""" | |
This script helps to import content from a Ghost blog database to Jekyll. | |
The database is expected to be running on a reachalbe MySQL host. | |
See the very end for DB configuration. | |
Quick Usage: | |
pip install -r requirements | |
python import.py | |
Posts will be written into _posts/<filename> | |
Authors will be listed to standard output. | |
""" | |
import MySQLdb | |
import yaml | |
tags = {} | |
authors = {} | |
def fetch_authors(): | |
authors_yaml = {"authors": {}} | |
c.execute("""SELECT | |
id, name, slug, bio, website, location, email | |
FROM users ORDER BY id""") | |
for entry in c.fetchall(): | |
authors[str(entry[0])] = entry[2] | |
authors_yaml["authors"][entry[2]] = { | |
"name": entry[1], | |
"bio": entry[3], | |
"website": entry[4], | |
"location": entry[5], | |
"email": entry[6], | |
} | |
print("Paste this part into your '_config.yml':") | |
print(yaml.dump(authors_yaml, default_flow_style=False)) | |
def fetch_tags(): | |
c.execute("""SELECT id, slug FROM tags ORDER BY id""") | |
for entry in c.fetchall(): | |
tags[str(entry[0])] = entry[1] | |
def clean_text(txt): | |
"""Fix codepage weirdness for all the Unicode characters we used""" | |
txt = txt.replace("’", "’") | |
txt = txt.replace("‘", "‘") | |
txt = txt.replace("–", "–") | |
txt = txt.replace("—", "–") | |
txt = txt.replace("―", "—") | |
txt = txt.replace("“", "\"") | |
txt = txt.replace("â€", "\"") | |
txt = txt.replace("â–ˆ", "█") | |
txt = txt.replace("â–‹", "▋") | |
txt = txt.replace("â–", "▍") | |
txt = txt.replace("â–Ž", "▎") | |
txt = txt.replace("â–", "▏") | |
txt = txt.replace("â–Š", "▊") | |
txt = txt.replace("â–‰", "▉") | |
txt = txt.replace("✅", "✅") | |
txt = txt.replace("„", "„") | |
txt = txt.replace("…", "…") | |
txt = txt.replace("Â ", "") | |
txt = txt.replace("ä", "ä") | |
txt = txt.replace("ö", "ö") | |
txt = txt.replace("ü", "ü") | |
txt = txt.replace("é", "é") | |
return txt | |
def fetch_posts(): | |
c.execute("""SELECT | |
id, title, slug, plaintext, meta_title, meta_description, | |
published_at, updated_at, author_id | |
FROM posts WHERE status='published' ORDER BY id""") | |
for entry in c.fetchall(): | |
author = authors[str(entry[8])] | |
date = str(entry[6])[0:10] | |
filename = date + "-" + entry[2] + ".md" | |
path = "_posts/" + filename | |
c2.execute("SELECT tag_id FROM posts_tags WHERE post_id=%s", (entry[0],)) | |
mytags = [] | |
for t in c2.fetchall(): | |
mytags.append(tags[str(t[0])]) | |
# frontmatter | |
frontmatter = {} | |
frontmatter["title"] = clean_text(entry[1]) | |
frontmatter["date"] = str(entry[6]) + " +0000" | |
frontmatter["categories"] = mytags | |
frontmatter["author"] = authors[str(entry[8])] | |
md = "---\n" + yaml.dump(frontmatter) + "---\n\n" | |
### text cleanup | |
text = entry[3] | |
text = text.replace("/content/images/", "/assets/") | |
text = clean_text(text) | |
md += text | |
with open(path, "w+") as markdownfile: | |
markdownfile.write(md) | |
if __name__ == "__main__": | |
db = MySQLdb.connect(host="localhost", user="ghost", passwd="password", db="ghost") | |
c = db.cursor() | |
c2 = db.cursor() | |
c.execute("SET NAMES utf8") | |
c.execute("SET CHARSET utf8") | |
fetch_tags() | |
fetch_authors() | |
fetch_posts() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MySQL-python==1.2.5 | |
PyYAML==3.12 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment