Skip to content

Instantly share code, notes, and snippets.

@maluta
Last active December 30, 2015 06:39
Show Gist options
  • Save maluta/7791137 to your computer and use it in GitHub Desktop.
Save maluta/7791137 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# wp2jekyll.py - convert a Wordpress dump (MySQL) to "pure" text for Jekyll plataform
#
# author: tiago maluta
# e-mail: [email protected]
import MySQLdb
# data
db_server=""
db_user=""
db_pass=""
db_name = ""
db_query = "SELECT post_date, post_title, post_content FROM wp_posts WHERE post_status=\"publish\" and post_type=\"post\";"
msg = "<small>[disclaimer: generated automatically by wp2jekyll.py</small>\n\n"
def main():
db = MySQLdb.connect(db_server,db_user,db_pass,db_name)
cursor = db.cursor()
cursor.execute(db_query)
while True:
data = cursor.fetchone()
if data is None:
break
convert(data)
db.close()
def convert(data):
post_date = data[0]
# unfortunately I'm having problems with non ascii chars.
post_title = (" ".join(data[1].split())).decode('utf-8','ignore')
post_content = data[2].decode('utf-8','ignore')
jekyll_postheader = "---\nlayout: post\ntitle: %s\n---\n\n%s\n\n" % (post_title.replace(":",""), msg)
# avoid naming problem in fs
post_title = post_title.replace("/","-")
post_title = post_title.replace("!","")
post_title = post_title.replace("(","")
post_title = post_title.replace(")","")
post_title = post_title.replace(".","")
post_title = post_title.replace("?","")
# template: YYYY-MM-DD-title.markdown
filename = post_date.strftime("%Y-%m-%d")+"-"+post_title.replace(" ","-")+".markdown"
print filename
f = open(filename,'w')
f.write(jekyll_postheader)
f.write(post_content)
f.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment