dolph · July 2, 2018 15:01
diff --git a/.flake8 b/.flake8
 [flake8]
 # E501: line too long
 # E701: multiple statements on one line
 ignore = E501,E701
diff --git a/convert_metadata.py b/convert_metadata.py
 import argparse
 import json
 import os
 import yaml


 EXPECTED_FIELDS = {
    'draft': bool,
    'type': basestring,
    'title': basestring,
    'subtitle': basestring,
    'abstract': basestring,
    'excerpt': basestring,
    'keywords': basestring,
    'authors': list,
    'translators': list,
    'completed_date': basestring,
    'last_updated': basestring,
    'pwg': list,
    'pta': list,
    'related_links': list,
    'related_content': list,
    'series': list,
    'social-media-meta': list,
    'primary_tag': basestring,
    'tags': list,
    'services': list,
    'components': list,
    'runtimes': list,
 }


 parser = argparse.ArgumentParser()
 parser.add_argument('path')
 args = parser.parse_args()

 path = os.path.abspath(args.path)
 if not os.path.isdir(path):
    raise SystemExit('path expected to be a directory path: %s' % path)

 # This is intended to be run from the root of the how-to repository.
 for root, dirs, files in os.walk(path):
    # Skip image directories, etc.
    if 'info.json' not in files or 'readme.md' not in files: continue
    with open(os.path.join(root, 'info.json'), 'r') as f: d = json.load(f)

    # Delete old attributes that are not expected in the new output.
    if '_comment_tags' in d: del d['_comment_tags']
    if '_comment_edit' in d: del d['_comment_edit']
    if '_comment_date_format' in d: del d['_comment_date_format']
    if 'links' in d: del d['links']
    if 'html' in d: del d['html']

    # Create a slug based on the directory name.
    d['slug'] = os.path.basename(root)
    print(d['slug'])

    # Clean up the description, ensure there's a period at the end, and dupe it
    # as an "excerpt".
    if d['description']: d['description'] = d['description'].strip()
    if d['description'] and d['description'][-1:] != '.': d['description'] = d['description'] + '.'
    d['abstract'] = d['excerpt'] = d['description']
    del d['description']

    # Set the completed date and last updated date to be the old date.
    d['completed_date'] = d['last_updated'] = d['date']
    del d['date']

    # Add new fields
    d.setdefault('subtitle', '')
    d.setdefault('series', [])
    d.setdefault('pta', [])
    d.setdefault('runtimes', [])
    d.setdefault('keywords', '')
    d.setdefault('related_content', [])
    d.setdefault('primary_tag', '')
    d.setdefault('related_links', [])
    d.setdefault('type', 'howto')
    d.setdefault('translators', [])
    d.setdefault('tags', [])
    d.setdefault('social-media-meta', [])
    d.setdefault('pwg', [])
    d.setdefault('authors', [])
    d.setdefault('components', [])
    d.setdefault('services', [])

    # Merge and sort tags.
    if 'tags_deployments' in d:
        d['tags'].extend(d['tags_deployments'])
        del d['tags_deployments']
    if 'tags_technology' in d:
        d['tags'].extend(d['tags_technology'])
        del d['tags_technology']
    if 'tags_industry' in d:
        d['tags'].extend(d['tags_industry'])
        del d['tags_industry']
    d['tags'] = sorted(d['tags'])

    # Ensure the list of "author" [sic] is actually a list, even if it's just
    # one author... and then fix the field name.
    if not isinstance(d['author'], list): d['author'] = [d['author']]
    d['authors'] = d['author']
    del d['author']

    # Ensure that required fields are present.
    for field, typ in EXPECTED_FIELDS.items():
        if field not in d:
            raise Exception('%s: Missing field %s' % (root, field))
        if not isinstance(d[field], typ):
            raise Exception('%s: Field %s is of unexpected type: %s' % (root, field, type(field)))

    # Render a YAML string.
    s = yaml.dump(yaml.load(json.dumps(d)), default_flow_style=False, explicit_start=True, width=999)

    # Re-write the readme file as YAML + Markdown.
    with open(os.path.join(root, 'readme.md'), 'r') as f: r = f.read()
    with open(os.path.join(root, 'readme.md'), 'w') as f: f.write(s + '---\n\n' + r.lstrip())

    # Delete the old metadata file.
    os.remove(os.path.join(root, 'info.json'))
	[flake8]
	# E501: line too long
	# E701: multiple statements on one line
	ignore = E501,E701
	import argparse
	import json
	import os
	import yaml


	EXPECTED_FIELDS = {
	'draft': bool,
	'type': basestring,
	'title': basestring,
	'subtitle': basestring,
	'abstract': basestring,
	'excerpt': basestring,
	'keywords': basestring,
	'authors': list,
	'translators': list,
	'completed_date': basestring,
	'last_updated': basestring,
	'pwg': list,
	'pta': list,
	'related_links': list,
	'related_content': list,
	'series': list,
	'social-media-meta': list,
	'primary_tag': basestring,
	'tags': list,
	'services': list,
	'components': list,
	'runtimes': list,
	}


	parser = argparse.ArgumentParser()
	parser.add_argument('path')
	args = parser.parse_args()

	path = os.path.abspath(args.path)
	if not os.path.isdir(path):
	raise SystemExit('path expected to be a directory path: %s' % path)

	# This is intended to be run from the root of the how-to repository.
	for root, dirs, files in os.walk(path):
	# Skip image directories, etc.
	if 'info.json' not in files or 'readme.md' not in files: continue
	with open(os.path.join(root, 'info.json'), 'r') as f: d = json.load(f)

	# Delete old attributes that are not expected in the new output.
	if '_comment_tags' in d: del d['_comment_tags']
	if '_comment_edit' in d: del d['_comment_edit']
	if '_comment_date_format' in d: del d['_comment_date_format']
	if 'links' in d: del d['links']
	if 'html' in d: del d['html']

	# Create a slug based on the directory name.
	d['slug'] = os.path.basename(root)
	print(d['slug'])

	# Clean up the description, ensure there's a period at the end, and dupe it
	# as an "excerpt".
	if d['description']: d['description'] = d['description'].strip()
	if d['description'] and d['description'][-1:] != '.': d['description'] = d['description'] + '.'
	d['abstract'] = d['excerpt'] = d['description']
	del d['description']

	# Set the completed date and last updated date to be the old date.
	d['completed_date'] = d['last_updated'] = d['date']
	del d['date']

	# Add new fields
	d.setdefault('subtitle', '')
	d.setdefault('series', [])
	d.setdefault('pta', [])
	d.setdefault('runtimes', [])
	d.setdefault('keywords', '')
	d.setdefault('related_content', [])
	d.setdefault('primary_tag', '')
	d.setdefault('related_links', [])
	d.setdefault('type', 'howto')
	d.setdefault('translators', [])
	d.setdefault('tags', [])
	d.setdefault('social-media-meta', [])
	d.setdefault('pwg', [])
	d.setdefault('authors', [])
	d.setdefault('components', [])
	d.setdefault('services', [])

	# Merge and sort tags.
	if 'tags_deployments' in d:
	d['tags'].extend(d['tags_deployments'])
	del d['tags_deployments']
	if 'tags_technology' in d:
	d['tags'].extend(d['tags_technology'])
	del d['tags_technology']
	if 'tags_industry' in d:
	d['tags'].extend(d['tags_industry'])
	del d['tags_industry']
	d['tags'] = sorted(d['tags'])

	# Ensure the list of "author" [sic] is actually a list, even if it's just
	# one author... and then fix the field name.
	if not isinstance(d['author'], list): d['author'] = [d['author']]
	d['authors'] = d['author']
	del d['author']

	# Ensure that required fields are present.
	for field, typ in EXPECTED_FIELDS.items():
	if field not in d:
	raise Exception('%s: Missing field %s' % (root, field))
	if not isinstance(d[field], typ):
	raise Exception('%s: Field %s is of unexpected type: %s' % (root, field, type(field)))

	# Render a YAML string.
	s = yaml.dump(yaml.load(json.dumps(d)), default_flow_style=False, explicit_start=True, width=999)

	# Re-write the readme file as YAML + Markdown.
	with open(os.path.join(root, 'readme.md'), 'r') as f: r = f.read()
	with open(os.path.join(root, 'readme.md'), 'w') as f: f.write(s + '---\n\n' + r.lstrip())

	# Delete the old metadata file.
	os.remove(os.path.join(root, 'info.json'))