Created
August 29, 2013 01:07
-
-
Save pwc3/6373225 to your computer and use it in GitHub Desktop.
Python script to extract Markdown from a VoodooPad file used for static blog generation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import codecs | |
import glob | |
import os | |
import plistlib | |
import re | |
import sys | |
from datetime import datetime | |
def parse_args(argv): | |
if argv is None: | |
argv = sys.argv[1:] | |
parser = argparse.ArgumentParser(description="") | |
parser.add_argument('-v', '--verbose', default=False, action='store_true') | |
parser.add_argument('vpdoc_file', | |
metavar='SRC_FILE', | |
help="input .vpdoc filename") | |
parser.add_argument('output_dir', | |
metavar='DST_DIR', | |
help="output directory") | |
return parser.parse_args(argv) | |
def generate_file_pairs(vp_root): | |
subdirs = os.listdir(os.path.join(vp_root, 'pages')) | |
for subdir in subdirs: | |
subdir = os.path.join(vp_root, 'pages', subdir) | |
for plist in glob.glob(os.path.join(subdir, '*.plist')): | |
src_file = plist.replace('.plist', '') | |
if os.path.exists(src_file): | |
yield (src_file, plist) | |
def generate_metadata(file_pair_generator): | |
for source_filename, plist_filename in file_pair_generator: | |
plist = plistlib.readPlist(plist_filename) | |
attributes = plist.get('attributes', {}) | |
pubdate = attributes.get('meta.pubdate', None) | |
if pubdate: | |
pubdate = datetime.strptime(pubdate, '%Y-%m-%d %H:%M:%S') | |
ctime = plist.get('createdDate', None) | |
publish_flag = attributes.get('meta.vpstatic.publish', False) | |
if publish_flag and (pubdate or ctime): | |
yield { | |
'src_filename' : source_filename, | |
'display_name' : plist.get('displayName', None), | |
'date' : pubdate or ctime, | |
'attributes' : attributes, | |
'plist' : plist, | |
} | |
def generate_dst_filename(metadata_generator): | |
for md in metadata_generator: | |
# datestamp = md['date'] | |
disp_name = md['display_name'] | |
# filename = '%s_%s.md' % (datestamp.strftime('%Y-%m-%d'), | |
# title_to_filename(disp_name)) | |
filename = '%s.md' % title_to_filename(disp_name) | |
md['dst_filename'] = filename | |
yield md | |
def make_slug(title): | |
return re.sub(r'[ ,]', '_', title).lower() | |
def title_to_filename(title): | |
return re.sub(r'[ ,-]', '_', title).lower() | |
def main(argv=None): | |
options = parse_args(argv) | |
if options.verbose: | |
from pprint import pprint | |
generator = generate_dst_filename( | |
generate_metadata( | |
generate_file_pairs(options.vpdoc_file))) | |
for d in generator: | |
src = d['src_filename'] | |
dst = os.path.join(options.output_dir, d['dst_filename']) | |
if options.verbose: | |
pprint(d['plist']) | |
print "Copy %s" % src | |
print " to %s" % dst | |
with codecs.open(dst, 'w', 'utf-8') as out_fh: | |
print >>out_fh, "title: ", d['display_name'] | |
print >>out_fh, "date: ", d['date'] | |
print >>out_fh, "slug: ", make_slug(d['display_name']) | |
print >>out_fh | |
with codecs.open(src, 'r', 'utf-8') as in_fh: | |
for line in in_fh: | |
print >>out_fh, line, | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment