Created
February 17, 2021 22:01
-
-
Save matze/7d899f6a8caebcd43eb9198167ac77ac to your computer and use it in GitHub Desktop.
Script to rewrite YAML frontmatter to TOML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import re | |
from pathlib import Path | |
from typing import List | |
from enum import Enum | |
def to_dir_path(arg: str) -> Path: | |
path = Path(arg) | |
if path.exists(): | |
if path.is_file(): | |
raise argparse.ArgumentTypeError(f"{arg} must be directory") | |
else: | |
path.mkdir() | |
return path | |
class ReadState(Enum): | |
START = 1 | |
FRONT_MATTER = 2 | |
BODY = 3 | |
class Rewriter: | |
def __init__(self, filename: Path, keep: List[str]): | |
self.fp = filename.open("r") | |
self.state = ReadState.START | |
self.keep = keep | |
def __iter__(self): | |
YAML_MARKER = "---\n" | |
TOML_MARKER = "+++\n" | |
YAML_EXPR = re.compile(r"^(\w+): (.+)$") | |
CODE_START_EXPR = re.compile(r"^~~~\s*(\w+)$") | |
CODE_END_MARKER = "~~~\n" | |
IMAGE_TAG = re.compile(r"{% image (.+) %}$") | |
for line in self.fp: | |
if self.state == ReadState.START: | |
if line == YAML_MARKER: | |
self.state = ReadState.FRONT_MATTER | |
yield TOML_MARKER | |
else: | |
self.state = ReadState.BODY | |
elif self.state == ReadState.FRONT_MATTER: | |
if line == YAML_MARKER: | |
self.state = ReadState.BODY | |
yield TOML_MARKER | |
else: | |
match = YAML_EXPR.match(line) | |
if match is not None and match.group(1) in self.keep: | |
value = match.group(2) | |
value = value[1:-1] if value.startswith("\"") and value.endswith("\"") else value | |
yield f"{match.group(1)} = \"{value}\"\n" | |
else: | |
if (match := CODE_START_EXPR.match(line)) is not None: | |
yield f"```{match.group(1)}\n" | |
elif (match := IMAGE_TAG.match(line)) is not None: | |
yield f"{{{{ image(path=\"/images/{match.group(1)}\") }}}}\n" | |
else: | |
if line == CODE_END_MARKER: | |
yield "```\n" | |
else: | |
yield line | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--input", type=lambda a: to_dir_path(a), required=True) | |
parser.add_argument("--output", type=lambda a: to_dir_path(a), required=True) | |
parser.add_argument("--keep", nargs="*", default=[]) | |
args = parser.parse_args() | |
for filename in args.input.glob("*.md"): | |
output = (args.output / filename.name).open("w") | |
for line in Rewriter(filename, args.keep): | |
output.write(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment