Last active
July 14, 2018 22:11
-
-
Save seamustuohy/1f52b3ad74da0c39cd3b6ca97e427e63 to your computer and use it in GitHub Desktop.
Snippet to update SAFETAG header metadata
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sudo apt install python3-pip | |
| pip3 install setuptools wheel | |
| pip3 install pyaml | |
| # Download the python script from this GIST |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| SAFETAG_DIR="/home/user/SAFETAG" | |
| declare -a doctypes=("methods" "adids") | |
| for doctype in "${doctypes[@]}"; do | |
| for i in "${SAFETAG_DIR}"/en/"${doctype}"/*.md; do | |
| python3 safetag_metadata_convert.py -d "${i}" -t "${doctype}" -D | |
| done | |
| done | |
| # Excercises are different | |
| for ex_folder in $(find "${SAFETAG_DIR}"/en/exercises/* -type d); do | |
| index="${ex_folder}/index.md" | |
| if [[ -f "${index}" ]]; then | |
| python3 safetag_metadata_convert.py -d "${index}" -t exercises -D | |
| fi | |
| done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| # | |
| # Copyright © 2018 seamus tuohy, <code@seamustuohy.com> | |
| # | |
| # This program is free software: you can redistribute it and/or modify it | |
| # under the terms of the GNU General Public License as published by the Free | |
| # Software Foundation, either version 3 of the License, or (at your option) | |
| # any later version. | |
| # | |
| # This program is distributed in the hope that it will be useful, but WITHOUT | |
| # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| # FITNESS FOR A PARTICULAR PURPOSE. See the included LICENSE file for details. | |
| import argparse | |
| from pyaml import yaml | |
| from yaml import SafeDumper | |
| import re | |
| import logging | |
| logging.basicConfig(level=logging.ERROR) | |
| log = logging.getLogger(__name__) | |
| def main(): | |
| args = parse_arguments() | |
| set_logging(args.verbose, args.debug) | |
| log.info("parsing {0}".format(args.doc)) | |
| with open(args.doc, "r") as fd: | |
| text = fd.read() | |
| updated = update_headers(text, args.doctype) | |
| with open(args.doc, "w") as fd: | |
| fd.write(updated) | |
| def update_headers(text, doctype): | |
| """ Update markdown headers""" | |
| META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)') | |
| META_MORE_RE = re.compile(r'^([ ]{4,}|- )(?P<value>.*)') | |
| BEGIN_RE = re.compile(r'^-{3}(\s.*)?') | |
| END_RE = re.compile(r'^(-{3}|\.{3})(\s.*)?') | |
| lines = text.splitlines() | |
| meta={} | |
| key = None | |
| if lines and BEGIN_RE.match(lines[0]): | |
| lines.pop(0) | |
| while lines: | |
| line = lines.pop(0) | |
| m1 = META_RE.match(line) | |
| if line.strip() == '' or END_RE.match(line): | |
| break # blank line or end of YAML header - done | |
| if m1: | |
| key = m1.group('key').strip() | |
| value = m1.group('value').strip() | |
| try: | |
| meta[key].append(value) | |
| except KeyError: | |
| # log.debug("Metadata Read Error") | |
| # log.debug("{0}:{1}".format(key, value)) | |
| meta[key] = [value] | |
| else: | |
| m2 = META_MORE_RE.match(line) | |
| if m2 and key: | |
| # Add another line to existing key | |
| meta[key].append(m2.group('value').strip()) | |
| else: | |
| lines.insert(0, line) | |
| break # no meta data - done | |
| log.debug("Existing Metadata : {0}".format(meta)) | |
| meta = get_meta(doctype, meta) | |
| header = make_header(meta) | |
| updated_doc = header + lines | |
| return '\n'.join(updated_doc) | |
| def make_header(meta): | |
| seperator = "---" | |
| data = [seperator] | |
| data += yaml.dump(meta, | |
| default_flow_style=False, | |
| Dumper=SafeDumper).splitlines() | |
| data.append(seperator) | |
| data.append("") | |
| log.debug("Final Header Metadata : {0}".format(meta)) | |
| return data | |
| def get_meta(doctype, meta={}): | |
| """ Set all missing headers with proper metadata""" | |
| defaults = {"methods": [ | |
| "Authors", | |
| "Info_required", | |
| "Info_provided"], | |
| "exercises": [ | |
| "Authors", | |
| "Skills_required", | |
| "Approach", | |
| "Time_required_minutes", | |
| "Materials_required", | |
| "Org_size_under", | |
| "Remote_options"], | |
| "adids": [ # curricula | |
| "Authors", | |
| "Skills_required", | |
| "Skills_trained"]} | |
| for i in defaults.get(doctype, []): | |
| # Set all to unknown by default | |
| meta.setdefault(i, ["unknown"]) | |
| log.debug("merged metadata : {0}".format(meta)) | |
| meta = clean_metadata(meta) | |
| return meta | |
| def clean_metadata(meta): | |
| """Remove extra empty metadata that creeps into metadata""" | |
| for key,vals in meta.items(): | |
| try: | |
| vals.remove("") | |
| except ValueError: | |
| pass | |
| # If we just removed all values set the value to unknown | |
| # otherwise it writes '[]' as a name which is silly | |
| if vals == []: | |
| meta[key] = ["unknown"] | |
| log.debug("Cleaned metadata : {0}".format(meta)) | |
| return meta | |
| # Command Line Functions below this point | |
| def set_logging(verbose=False, debug=False): | |
| if debug == True: | |
| log.setLevel("DEBUG") | |
| elif verbose == True: | |
| log.setLevel("INFO") | |
| def parse_arguments(): | |
| parser = argparse.ArgumentParser("Update markdown headers for SAFETAG docs") | |
| parser.add_argument("--verbose", "-v", | |
| help="Turn verbosity on", | |
| action='store_true') | |
| parser.add_argument("--debug", "-D", | |
| help="Turn debug on", | |
| action='store_true') | |
| parser.add_argument("--doctype", "-t", | |
| help="document type", | |
| choices=["adids","methods","exercises"]) | |
| parser.add_argument("--doc", "-d", help="path of document to update") | |
| args = parser.parse_args() | |
| return args | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment