Skip to content

Instantly share code, notes, and snippets.

@seamustuohy
Last active July 14, 2018 22:11
Show Gist options
  • Select an option

  • Save seamustuohy/1f52b3ad74da0c39cd3b6ca97e427e63 to your computer and use it in GitHub Desktop.

Select an option

Save seamustuohy/1f52b3ad74da0c39cd3b6ca97e427e63 to your computer and use it in GitHub Desktop.
Snippet to update SAFETAG header metadata
sudo apt install python3-pip
pip3 install setuptools wheel
pip3 install pyaml
# Download the python script from this GIST
SAFETAG_DIR="/home/user/SAFETAG"
declare -a doctypes=("methods" "adids")
for doctype in "${doctypes[@]}"; do
for i in "${SAFETAG_DIR}"/en/"${doctype}"/*.md; do
python3 safetag_metadata_convert.py -d "${i}" -t "${doctype}" -D
done
done
# Excercises are different
for ex_folder in $(find "${SAFETAG_DIR}"/en/exercises/* -type d); do
index="${ex_folder}/index.md"
if [[ -f "${index}" ]]; then
python3 safetag_metadata_convert.py -d "${index}" -t exercises -D
fi
done
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright © 2018 seamus tuohy, <code@seamustuohy.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the included LICENSE file for details.
import argparse
from pyaml import yaml
from yaml import SafeDumper
import re
import logging
logging.basicConfig(level=logging.ERROR)
log = logging.getLogger(__name__)
def main():
args = parse_arguments()
set_logging(args.verbose, args.debug)
log.info("parsing {0}".format(args.doc))
with open(args.doc, "r") as fd:
text = fd.read()
updated = update_headers(text, args.doctype)
with open(args.doc, "w") as fd:
fd.write(updated)
def update_headers(text, doctype):
""" Update markdown headers"""
META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)')
META_MORE_RE = re.compile(r'^([ ]{4,}|- )(?P<value>.*)')
BEGIN_RE = re.compile(r'^-{3}(\s.*)?')
END_RE = re.compile(r'^(-{3}|\.{3})(\s.*)?')
lines = text.splitlines()
meta={}
key = None
if lines and BEGIN_RE.match(lines[0]):
lines.pop(0)
while lines:
line = lines.pop(0)
m1 = META_RE.match(line)
if line.strip() == '' or END_RE.match(line):
break # blank line or end of YAML header - done
if m1:
key = m1.group('key').strip()
value = m1.group('value').strip()
try:
meta[key].append(value)
except KeyError:
# log.debug("Metadata Read Error")
# log.debug("{0}:{1}".format(key, value))
meta[key] = [value]
else:
m2 = META_MORE_RE.match(line)
if m2 and key:
# Add another line to existing key
meta[key].append(m2.group('value').strip())
else:
lines.insert(0, line)
break # no meta data - done
log.debug("Existing Metadata : {0}".format(meta))
meta = get_meta(doctype, meta)
header = make_header(meta)
updated_doc = header + lines
return '\n'.join(updated_doc)
def make_header(meta):
seperator = "---"
data = [seperator]
data += yaml.dump(meta,
default_flow_style=False,
Dumper=SafeDumper).splitlines()
data.append(seperator)
data.append("")
log.debug("Final Header Metadata : {0}".format(meta))
return data
def get_meta(doctype, meta={}):
""" Set all missing headers with proper metadata"""
defaults = {"methods": [
"Authors",
"Info_required",
"Info_provided"],
"exercises": [
"Authors",
"Skills_required",
"Approach",
"Time_required_minutes",
"Materials_required",
"Org_size_under",
"Remote_options"],
"adids": [ # curricula
"Authors",
"Skills_required",
"Skills_trained"]}
for i in defaults.get(doctype, []):
# Set all to unknown by default
meta.setdefault(i, ["unknown"])
log.debug("merged metadata : {0}".format(meta))
meta = clean_metadata(meta)
return meta
def clean_metadata(meta):
"""Remove extra empty metadata that creeps into metadata"""
for key,vals in meta.items():
try:
vals.remove("")
except ValueError:
pass
# If we just removed all values set the value to unknown
# otherwise it writes '[]' as a name which is silly
if vals == []:
meta[key] = ["unknown"]
log.debug("Cleaned metadata : {0}".format(meta))
return meta
# Command Line Functions below this point
def set_logging(verbose=False, debug=False):
if debug == True:
log.setLevel("DEBUG")
elif verbose == True:
log.setLevel("INFO")
def parse_arguments():
parser = argparse.ArgumentParser("Update markdown headers for SAFETAG docs")
parser.add_argument("--verbose", "-v",
help="Turn verbosity on",
action='store_true')
parser.add_argument("--debug", "-D",
help="Turn debug on",
action='store_true')
parser.add_argument("--doctype", "-t",
help="document type",
choices=["adids","methods","exercises"])
parser.add_argument("--doc", "-d", help="path of document to update")
args = parser.parse_args()
return args
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment