|
#! /usr/bin/env python |
|
""" |
|
walk tree, replace .Rmd with processed .qmd |
|
""" |
|
from pathlib import Path |
|
import sys |
|
from ruamel.yaml import YAML |
|
import re |
|
|
|
yaml = YAML(typ='rt') |
|
|
|
|
|
def replace_internal_links(input_string): |
|
# Regex pattern to find internal links with optional leading '/', |
|
# and an optional fragment identifier |
|
pattern = r'\[([^\]]+)\]\((\.?\/.*?)(\.html)(#.*?)?\)' |
|
# Replacement pattern, including the fragment identifier if it exists |
|
replacement = r'[\1](\2.qmd\4)' |
|
|
|
# Replace the found patterns with the new format |
|
return re.sub(pattern, replacement, input_string) |
|
|
|
|
|
def replace_math_delimiters(input_string): |
|
# Function to determine the replacement based on single-line or multi-line |
|
def replacement(match): |
|
text = match.group(1) |
|
if '\n' in text: |
|
# Multi-line match |
|
return f'$$ {text} $$' |
|
else: |
|
# Single-line match |
|
return f'${text}$' |
|
|
|
# Replace math: \( ... \) with $ ... $ or $$ ... $$ depending on single-line or multi-line |
|
math_pattern = r'\\\((.*?)\\\)' |
|
input_string = re.sub(math_pattern, replacement, input_string, flags=re.DOTALL) |
|
|
|
# Replace display math: \[ ... \] with $$ ... $$ |
|
display_math_pattern = r'\\\[(.*?)\\\]' |
|
input_string = re.sub(display_math_pattern, r'$$\1$$', input_string, flags=re.DOTALL) |
|
|
|
return input_string |
|
|
|
def read(fname): |
|
metadata = {} |
|
outlines = [] |
|
with open(fname, 'r', encoding='utf8') as fp: |
|
lines = fp.readlines() |
|
|
|
if len(lines) == 0: |
|
return {}, "" |
|
|
|
if lines[0] == ('---\n'): # YAML header |
|
# Load the data we need to parse |
|
to_parse = [] |
|
for i, line in enumerate(lines[1:]): |
|
# When we find a terminator (`---` or `...`), stop. |
|
if line in ('---\n', '...\n'): |
|
# Do not include the terminator itself. |
|
break |
|
|
|
# Otherwise, just keep adding the lines to the parseable. |
|
to_parse.append(line) |
|
|
|
parsed = yaml.load("".join(to_parse)) |
|
|
|
for k in parsed: |
|
name, value = k.lower(), parsed[k] |
|
metadata[name] = value |
|
|
|
else: |
|
for i, line in enumerate(lines): |
|
kv = line.split(':', 1) |
|
if len(kv) == 2: |
|
name, value = kv[0].lower(), kv[1].strip() |
|
else: |
|
break |
|
|
|
if len(lines) > i+2: |
|
for line in lines[i+2:]: |
|
outlines.append(line) |
|
return metadata, "".join(outlines) |
|
|
|
|
|
def write(fname, metadata, content): |
|
with open(fname, 'w', encoding='utf8') as fp: |
|
fp.write('---\n') |
|
yaml.dump( |
|
metadata, |
|
fp, |
|
) |
|
fp.write('---\n') |
|
fp.write(content) |
|
|
|
|
|
def massage_one_file(rmdname): |
|
""" |
|
very minor tweaks to update for Quarto metadata. |
|
""" |
|
stem = str(rmdname.stem) |
|
htmlname = rmdname.with_suffix('.html') |
|
yamlname = rmdname.with_suffix('.yaml') |
|
bibname = rmdname.with_suffix('.bib') |
|
is_listing = False |
|
is_tag = False |
|
|
|
if rmdname.parts[1] == 'tags': |
|
is_tag = True |
|
|
|
if stem.startswith('_index'): |
|
# ignore the path part, use the parent dirname as the base of a new file |
|
pathparts = rmdname.parts[1:-1] |
|
if len(pathparts): |
|
if is_tag: |
|
pathparts = tuple(['_tags', *pathparts[1:]]) |
|
qmdname = Path(*pathparts).with_suffix('.qmd') |
|
else: |
|
qmdname = Path('index.qmd') |
|
is_listing = True |
|
indexpath = str(qmdname.stem) |
|
else: |
|
# Skip the first part and reconstruct the path |
|
qmdname = Path(*rmdname.parts[1:]) |
|
|
|
qmdname = qmdname.with_suffix('.qmd') |
|
newyamlname = Path(*yamlname.parts[1:]) |
|
newbibname = Path(*bibname.parts[1:]) |
|
|
|
metadata, rmdcontent = read(rmdname) |
|
images = metadata.get('images', []) |
|
if len(images) > 0: |
|
metadata['image'] = images[0] |
|
if 'description' in metadata: |
|
#rename to 'subtitle' |
|
metadata['subtitle'] = metadata['description'] |
|
del metadata['description'] |
|
if 'modified' not in metadata and 'date' in metadata: |
|
metadata['date-modified'] = metadata['date'] |
|
if 'modified' in metadata: |
|
metadata['date-modified'] = metadata['modified'] |
|
del(metadata['modified']) |
|
if 'tags' in metadata: |
|
new_tags = [s.replace("_", " ") for s in metadata['tags']] |
|
# rename to 'categories' |
|
metadata['categories'] = new_tags |
|
del metadata['tags'] |
|
if is_listing and not is_tag: |
|
metadata['listing'] = { |
|
'contents': indexpath, |
|
'feed': True, |
|
} |
|
|
|
qmdname.parent.mkdir(parents=True, exist_ok=True) |
|
qmdcontent = replace_internal_links(rmdcontent) |
|
qmdcontent = replace_math_delimiters(qmdcontent) |
|
write(qmdname, metadata, qmdcontent) |
|
print(f"writing {rmdname} to {qmdname}") |
|
|
|
if rmdname.is_file(): |
|
rmdname.unlink() |
|
if htmlname.is_file(): |
|
htmlname.unlink() |
|
if yamlname.is_file(): |
|
yamlname.rename(newyamlname) |
|
if bibname.is_file(): |
|
bibname.rename(newbibname) |
|
|
|
|
|
def main(): |
|
glb0 = "content/**/*.Rmd" |
|
#TODO: check for non-relative paths |
|
paths = Path('').glob(glb0) |
|
for fname in paths: |
|
massage_one_file(fname) |
|
# move the remains per default maybe that works OK |
|
glb1 = "content/**/*" |
|
paths = Path('').glob(glb1) |
|
for fname in paths: |
|
newfname = Path(*fname.parts[1:]) |
|
newfname.parent.mkdir(parents=True, exist_ok=True) |
|
if fname.is_file(): |
|
fname.rename(newfname) |
|
print(f"renamed {fname} to {newfname}") |
|
|
|
if __name__ == "__main__": |
|
main(*sys.argv[1:]) |
|
``` |