danmackinlay · October 7, 2024 04:22
diff --git a/README.md b/README.md
diff --git a/blogdown_to_quarto.py b/blogdown_to_quarto.py
 #! /usr/bin/env python
 """
 walk tree, replace .Rmd with processed .qmd
 """
 from pathlib import Path
 import sys
 from ruamel.yaml import YAML
 import re

 yaml = YAML(typ='rt')


 def replace_internal_links(input_string):
    # Regex pattern to find internal links with optional leading '/',
    # and an optional fragment identifier
    pattern = r'\[([^\]]+)\]\((\.?\/.*?)(\.html)(#.*?)?\)'
    # Replacement pattern, including the fragment identifier if it exists
    replacement = r'[\1](\2.qmd\4)'

    # Replace the found patterns with the new format
    return re.sub(pattern, replacement, input_string)


 def replace_math_delimiters(input_string):
    # Function to determine the replacement based on single-line or multi-line
    def replacement(match):
        text = match.group(1)
        if '\n' in text:
            # Multi-line match
            return f'$$ {text} $$'
        else:
            # Single-line match
            return f'${text}$'

    # Replace math: \( ... \) with $ ... $ or $$ ... $$ depending on single-line or multi-line
    math_pattern = r'\\\((.*?)\\\)'
    input_string = re.sub(math_pattern, replacement, input_string, flags=re.DOTALL)

    # Replace display math: \[ ... \] with $$ ... $$
    display_math_pattern = r'\\\[(.*?)\\\]'
    input_string = re.sub(display_math_pattern, r'$$\1$$', input_string, flags=re.DOTALL)

    return input_string

 def read(fname):
    metadata = {}
    outlines = []
    with open(fname, 'r', encoding='utf8') as fp:
        lines = fp.readlines()

    if len(lines) == 0:
        return {}, ""

    if lines[0] == ('---\n'):  # YAML header
        # Load the data we need to parse
        to_parse = []
        for i, line in enumerate(lines[1:]):
            # When we find a terminator (`---` or `...`), stop.
            if line in ('---\n', '...\n'):
                # Do not include the terminator itself.
                break

            # Otherwise, just keep adding the lines to the parseable.
            to_parse.append(line)

        parsed = yaml.load("".join(to_parse))

        for k in parsed:
            name, value = k.lower(), parsed[k]
            metadata[name] = value

    else:
        for i, line in enumerate(lines):
            kv = line.split(':', 1)
            if len(kv) == 2:
                name, value = kv[0].lower(), kv[1].strip()
            else:
                break

    if len(lines) > i+2:
        for line in lines[i+2:]:
            outlines.append(line)
    return metadata, "".join(outlines)


 def write(fname, metadata, content):
    with open(fname, 'w', encoding='utf8') as fp:
        fp.write('---\n')
        yaml.dump(
            metadata,
            fp,
        )
        fp.write('---\n')
        fp.write(content)


 def massage_one_file(rmdname):
    """
    very minor tweaks to update for Quarto metadata.
    """
    stem = str(rmdname.stem)
    htmlname = rmdname.with_suffix('.html')
    yamlname = rmdname.with_suffix('.yaml')
    bibname = rmdname.with_suffix('.bib')
    is_listing = False
    is_tag = False

    if rmdname.parts[1] == 'tags':
        is_tag = True

    if stem.startswith('_index'):
        # ignore the path part, use the parent dirname as the base of a new file
        pathparts = rmdname.parts[1:-1]
        if len(pathparts):
            if is_tag:
                pathparts = tuple(['_tags', *pathparts[1:]])
            qmdname = Path(*pathparts).with_suffix('.qmd')
        else:
            qmdname = Path('index.qmd')
        is_listing = True
        indexpath = str(qmdname.stem)
    else:
        # Skip the first part and reconstruct the path
        qmdname = Path(*rmdname.parts[1:])

    qmdname = qmdname.with_suffix('.qmd')
    newyamlname = Path(*yamlname.parts[1:])
    newbibname = Path(*bibname.parts[1:])

    metadata, rmdcontent = read(rmdname)
    images = metadata.get('images', [])
    if len(images) > 0:
        metadata['image'] = images[0]
    if 'description' in metadata:
        #rename to 'subtitle'
        metadata['subtitle'] = metadata['description']
        del metadata['description']
    if 'modified' not in metadata and 'date' in metadata:
        metadata['date-modified'] = metadata['date']
    if 'modified' in metadata:
        metadata['date-modified'] = metadata['modified']
        del(metadata['modified'])
    if 'tags' in metadata:
        new_tags = [s.replace("_", " ") for s in metadata['tags']]
        # rename to 'categories'
        metadata['categories'] = new_tags
        del metadata['tags']
    if is_listing and not is_tag:
        metadata['listing'] = {
            'contents': indexpath,
            'feed': True,
        }

    qmdname.parent.mkdir(parents=True, exist_ok=True)
    qmdcontent = replace_internal_links(rmdcontent)
    qmdcontent = replace_math_delimiters(qmdcontent)
    write(qmdname, metadata, qmdcontent)
    print(f"writing {rmdname} to {qmdname}")

    if rmdname.is_file():
        rmdname.unlink()
    if htmlname.is_file():
        htmlname.unlink()
    if yamlname.is_file():
        yamlname.rename(newyamlname)
    if bibname.is_file():
        bibname.rename(newbibname)


 def main():
    glb0 = "content/**/*.Rmd"
    #TODO: check for non-relative paths
    paths = Path('').glob(glb0)
    for fname in paths:
        massage_one_file(fname)
    # move the remains per default maybe that works OK
    glb1 = "content/**/*"
    paths = Path('').glob(glb1)
    for fname in paths:
        newfname = Path(*fname.parts[1:])
        newfname.parent.mkdir(parents=True, exist_ok=True)
        if fname.is_file():
            fname.rename(newfname)
            print(f"renamed {fname} to {newfname}")

 if __name__ == "__main__":
    main(*sys.argv[1:])
 ```
	#! /usr/bin/env python
	"""
	walk tree, replace .Rmd with processed .qmd
	"""
	from pathlib import Path
	import sys
	from ruamel.yaml import YAML
	import re

	yaml = YAML(typ='rt')


	def replace_internal_links(input_string):
	# Regex pattern to find internal links with optional leading '/',
	# and an optional fragment identifier
	pattern = r'\[([^\]]+)\]\((\.?\/.?)(\.html)(#.?)?\)'
	# Replacement pattern, including the fragment identifier if it exists
	replacement = r'[\1](\2.qmd\4)'

	# Replace the found patterns with the new format
	return re.sub(pattern, replacement, input_string)


	def replace_math_delimiters(input_string):
	# Function to determine the replacement based on single-line or multi-line
	def replacement(match):
	text = match.group(1)
	if '\n' in text:
	# Multi-line match
	return f'$$ {text} $$'
	else:
	# Single-line match
	return f'${text}$'

	# Replace math: \( ... \) with $ ... $ or $$ ... $$ depending on single-line or multi-line
	math_pattern = r'\\\((.*?)\\\)'
	input_string = re.sub(math_pattern, replacement, input_string, flags=re.DOTALL)

	# Replace display math: \[ ... \] with $$ ... $$
	display_math_pattern = r'\\\[(.*?)\\\]'
	input_string = re.sub(display_math_pattern, r'$$\1$$', input_string, flags=re.DOTALL)

	return input_string

	def read(fname):
	metadata = {}
	outlines = []
	with open(fname, 'r', encoding='utf8') as fp:
	lines = fp.readlines()

	if len(lines) == 0:
	return {}, ""

	if lines[0] == ('---\n'): # YAML header
	# Load the data we need to parse
	to_parse = []
	for i, line in enumerate(lines[1:]):
	# When we find a terminator (`---` or `...`), stop.
	if line in ('---\n', '...\n'):
	# Do not include the terminator itself.
	break

	# Otherwise, just keep adding the lines to the parseable.
	to_parse.append(line)

	parsed = yaml.load("".join(to_parse))

	for k in parsed:
	name, value = k.lower(), parsed[k]
	metadata[name] = value

	else:
	for i, line in enumerate(lines):
	kv = line.split(':', 1)
	if len(kv) == 2:
	name, value = kv[0].lower(), kv[1].strip()
	else:
	break

	if len(lines) > i+2:
	for line in lines[i+2:]:
	outlines.append(line)
	return metadata, "".join(outlines)


	def write(fname, metadata, content):
	with open(fname, 'w', encoding='utf8') as fp:
	fp.write('---\n')
	yaml.dump(
	metadata,
	fp,
	)
	fp.write('---\n')
	fp.write(content)


	def massage_one_file(rmdname):
	"""
	very minor tweaks to update for Quarto metadata.
	"""
	stem = str(rmdname.stem)
	htmlname = rmdname.with_suffix('.html')
	yamlname = rmdname.with_suffix('.yaml')
	bibname = rmdname.with_suffix('.bib')
	is_listing = False
	is_tag = False

	if rmdname.parts[1] == 'tags':
	is_tag = True

	if stem.startswith('_index'):
	# ignore the path part, use the parent dirname as the base of a new file
	pathparts = rmdname.parts[1:-1]
	if len(pathparts):
	if is_tag:
	pathparts = tuple(['_tags', *pathparts[1:]])
	qmdname = Path(*pathparts).with_suffix('.qmd')
	else:
	qmdname = Path('index.qmd')
	is_listing = True
	indexpath = str(qmdname.stem)
	else:
	# Skip the first part and reconstruct the path
	qmdname = Path(*rmdname.parts[1:])

	qmdname = qmdname.with_suffix('.qmd')
	newyamlname = Path(*yamlname.parts[1:])
	newbibname = Path(*bibname.parts[1:])

	metadata, rmdcontent = read(rmdname)
	images = metadata.get('images', [])
	if len(images) > 0:
	metadata['image'] = images[0]
	if 'description' in metadata:
	#rename to 'subtitle'
	metadata['subtitle'] = metadata['description']
	del metadata['description']
	if 'modified' not in metadata and 'date' in metadata:
	metadata['date-modified'] = metadata['date']
	if 'modified' in metadata:
	metadata['date-modified'] = metadata['modified']
	del(metadata['modified'])
	if 'tags' in metadata:
	new_tags = [s.replace("_", " ") for s in metadata['tags']]
	# rename to 'categories'
	metadata['categories'] = new_tags
	del metadata['tags']
	if is_listing and not is_tag:
	metadata['listing'] = {
	'contents': indexpath,
	'feed': True,
	}

	qmdname.parent.mkdir(parents=True, exist_ok=True)
	qmdcontent = replace_internal_links(rmdcontent)
	qmdcontent = replace_math_delimiters(qmdcontent)
	write(qmdname, metadata, qmdcontent)
	print(f"writing {rmdname} to {qmdname}")

	if rmdname.is_file():
	rmdname.unlink()
	if htmlname.is_file():
	htmlname.unlink()
	if yamlname.is_file():
	yamlname.rename(newyamlname)
	if bibname.is_file():
	bibname.rename(newbibname)


	def main():
	glb0 = "content/*/.Rmd"
	#TODO: check for non-relative paths
	paths = Path('').glob(glb0)
	for fname in paths:
	massage_one_file(fname)
	# move the remains per default maybe that works OK
	glb1 = "content/*/"
	paths = Path('').glob(glb1)
	for fname in paths:
	newfname = Path(*fname.parts[1:])
	newfname.parent.mkdir(parents=True, exist_ok=True)
	if fname.is_file():
	fname.rename(newfname)
	print(f"renamed {fname} to {newfname}")

	if __name__ == "__main__":
	main(*sys.argv[1:])
	```