Skip to content

Instantly share code, notes, and snippets.

@danmackinlay
Created October 7, 2024 04:22
Show Gist options
  • Save danmackinlay/c6f277e244de90e10b535efbe0610a0c to your computer and use it in GitHub Desktop.
Save danmackinlay/c6f277e244de90e10b535efbe0610a0c to your computer and use it in GitHub Desktop.

Migrate from blogdown to quarto

As mentioned on my blog, it is helpful to process blog posts to get maximum benefit out of quarto when migrating from blogdown.

This is the script I used. Feel free to use it, modify it and so on.

(c) 2023 Dan Mackinlay

Released under the terms of the MIT license

#! /usr/bin/env python
"""
walk tree, replace .Rmd with processed .qmd
"""
from pathlib import Path
import sys
from ruamel.yaml import YAML
import re
yaml = YAML(typ='rt')
def replace_internal_links(input_string):
# Regex pattern to find internal links with optional leading '/',
# and an optional fragment identifier
pattern = r'\[([^\]]+)\]\((\.?\/.*?)(\.html)(#.*?)?\)'
# Replacement pattern, including the fragment identifier if it exists
replacement = r'[\1](\2.qmd\4)'
# Replace the found patterns with the new format
return re.sub(pattern, replacement, input_string)
def replace_math_delimiters(input_string):
# Function to determine the replacement based on single-line or multi-line
def replacement(match):
text = match.group(1)
if '\n' in text:
# Multi-line match
return f'$$ {text} $$'
else:
# Single-line match
return f'${text}$'
# Replace math: \( ... \) with $ ... $ or $$ ... $$ depending on single-line or multi-line
math_pattern = r'\\\((.*?)\\\)'
input_string = re.sub(math_pattern, replacement, input_string, flags=re.DOTALL)
# Replace display math: \[ ... \] with $$ ... $$
display_math_pattern = r'\\\[(.*?)\\\]'
input_string = re.sub(display_math_pattern, r'$$\1$$', input_string, flags=re.DOTALL)
return input_string
def read(fname):
metadata = {}
outlines = []
with open(fname, 'r', encoding='utf8') as fp:
lines = fp.readlines()
if len(lines) == 0:
return {}, ""
if lines[0] == ('---\n'): # YAML header
# Load the data we need to parse
to_parse = []
for i, line in enumerate(lines[1:]):
# When we find a terminator (`---` or `...`), stop.
if line in ('---\n', '...\n'):
# Do not include the terminator itself.
break
# Otherwise, just keep adding the lines to the parseable.
to_parse.append(line)
parsed = yaml.load("".join(to_parse))
for k in parsed:
name, value = k.lower(), parsed[k]
metadata[name] = value
else:
for i, line in enumerate(lines):
kv = line.split(':', 1)
if len(kv) == 2:
name, value = kv[0].lower(), kv[1].strip()
else:
break
if len(lines) > i+2:
for line in lines[i+2:]:
outlines.append(line)
return metadata, "".join(outlines)
def write(fname, metadata, content):
with open(fname, 'w', encoding='utf8') as fp:
fp.write('---\n')
yaml.dump(
metadata,
fp,
)
fp.write('---\n')
fp.write(content)
def massage_one_file(rmdname):
"""
very minor tweaks to update for Quarto metadata.
"""
stem = str(rmdname.stem)
htmlname = rmdname.with_suffix('.html')
yamlname = rmdname.with_suffix('.yaml')
bibname = rmdname.with_suffix('.bib')
is_listing = False
is_tag = False
if rmdname.parts[1] == 'tags':
is_tag = True
if stem.startswith('_index'):
# ignore the path part, use the parent dirname as the base of a new file
pathparts = rmdname.parts[1:-1]
if len(pathparts):
if is_tag:
pathparts = tuple(['_tags', *pathparts[1:]])
qmdname = Path(*pathparts).with_suffix('.qmd')
else:
qmdname = Path('index.qmd')
is_listing = True
indexpath = str(qmdname.stem)
else:
# Skip the first part and reconstruct the path
qmdname = Path(*rmdname.parts[1:])
qmdname = qmdname.with_suffix('.qmd')
newyamlname = Path(*yamlname.parts[1:])
newbibname = Path(*bibname.parts[1:])
metadata, rmdcontent = read(rmdname)
images = metadata.get('images', [])
if len(images) > 0:
metadata['image'] = images[0]
if 'description' in metadata:
#rename to 'subtitle'
metadata['subtitle'] = metadata['description']
del metadata['description']
if 'modified' not in metadata and 'date' in metadata:
metadata['date-modified'] = metadata['date']
if 'modified' in metadata:
metadata['date-modified'] = metadata['modified']
del(metadata['modified'])
if 'tags' in metadata:
new_tags = [s.replace("_", " ") for s in metadata['tags']]
# rename to 'categories'
metadata['categories'] = new_tags
del metadata['tags']
if is_listing and not is_tag:
metadata['listing'] = {
'contents': indexpath,
'feed': True,
}
qmdname.parent.mkdir(parents=True, exist_ok=True)
qmdcontent = replace_internal_links(rmdcontent)
qmdcontent = replace_math_delimiters(qmdcontent)
write(qmdname, metadata, qmdcontent)
print(f"writing {rmdname} to {qmdname}")
if rmdname.is_file():
rmdname.unlink()
if htmlname.is_file():
htmlname.unlink()
if yamlname.is_file():
yamlname.rename(newyamlname)
if bibname.is_file():
bibname.rename(newbibname)
def main():
glb0 = "content/**/*.Rmd"
#TODO: check for non-relative paths
paths = Path('').glob(glb0)
for fname in paths:
massage_one_file(fname)
# move the remains per default maybe that works OK
glb1 = "content/**/*"
paths = Path('').glob(glb1)
for fname in paths:
newfname = Path(*fname.parts[1:])
newfname.parent.mkdir(parents=True, exist_ok=True)
if fname.is_file():
fname.rename(newfname)
print(f"renamed {fname} to {newfname}")
if __name__ == "__main__":
main(*sys.argv[1:])
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment