danmackinlay · December 29, 2024 07:59
diff --git a/quarto_thumbnail.py b/quarto_thumbnail.py
 #!/usr/bin/env python
 r"""
 Update doc metadata by parsing HTML version for images, generating thumbnails of the first admissible image, and updating the metadata with the thumbnail URL.
 """

 from pathlib import Path
 import subprocess
 import argparse
 import logging
 from ruamel.yaml import YAML
 from bs4 import BeautifulSoup
 import os
 import re

 # Initialize logger without configuring it yet
 logger = logging.getLogger(__name__)


 def configure_logging(log_level: str):
    """
    Configures the logging level and format.

    Args:
        log_level (str): The logging level as a string.
    """
    numeric_level = getattr(logging, log_level.upper(), None)
    if not isinstance(numeric_level, int):
        logger.warning(f"Invalid log level: {log_level}. Defaulting to INFO.")
        numeric_level = logging.INFO

    logging.basicConfig(level=numeric_level, format='%(levelname)s: %(message)s')


 THUMB_WIDTH = 320
 THUMB_HEIGHT = 320

 yaml = YAML(typ='rt')


 def read(fname):
    """
    Reads a Quarto/Pandoc markdown file and extracts its YAML metadata and content.

    Args:
        fname (Path): Path to the `.qmd` file.

    Returns:
        tuple: A dictionary of metadata and a list of content lines.
    """
    metadata = {}
    outlines = []
    try:
        with open(fname, 'r', encoding='utf8') as fp:
            lines = fp.readlines()

        if not lines:
            return {}, []

        if lines[0].strip() == '---':  # YAML header
            # Load the data we need to parse
            to_parse = []
            i = 1
            for i, line in enumerate(lines[1:], start=1):
                # When we find a terminator (`---` or '...'), stop.
                if line.strip() in ('---', '...'):
                    break
                to_parse.append(line)

            parsed = yaml.load("".join(to_parse))

            for k, v in parsed.items():
                metadata[k.lower()] = v

            content_start = i + 1
            outlines = lines[content_start:]
        else:
            # Handle files without YAML front matter
            for i, line in enumerate(lines):
                kv = line.split(':', 1)
                if len(kv) == 2:
                    name, value = kv[0].lower(), kv[1].strip()
                    metadata[name] = value
                else:
                    break
            outlines = lines[i+1:]

        return metadata, outlines
    except Exception as e:
        logging.warning(f"Failed to read {fname}: {str(e)}")
        return {}, []


 def write(fname, metadata, content):
    """
    Writes the YAML metadata and content back to a `.qmd` file.

    Args:
        fname (Path): Path to the `.qmd` file.
        metadata (dict): Metadata to write.
        content (str): Markdown content to write.
    """
    try:
        with open(fname, 'w', encoding='utf8') as fp:
            fp.write('---\n')
            yaml.dump(metadata, fp)
            fp.write('---\n')
            fp.write(content)
        logging.info(f"Successfully wrote updated metadata to {fname}")
    except Exception as e:
        logging.warning(f"Failed to write {fname}: {str(e)}")


 def get_file_mod_time(path):
    """
    Gets the modification time of a file.

    Args:
        path (Path): Path to the file.

    Returns:
        float: Modification time in seconds since the epoch.
    """
    try:
        return os.path.getmtime(path)
    except OSError:
        # Return the Unix epoch time if the file does not exist
        return 0.0


 def create_thumbnail(image_path, thumbnail_path):
    """
    Creates a thumbnail of the given image using VIPS.

    Args:
        image_path (Path): Path to the original image.
        thumbnail_path (Path): Path where the thumbnail will be saved.

    Returns:
        bool: True if thumbnail creation was successful, False otherwise.
    """
    try:
        # Ensure the parent directory exists
        thumbnail_path.parent.mkdir(parents=True, exist_ok=True)

        # Construct the command
        cmd = [
            "vips",
            "thumbnail",
            str(image_path),
            str(thumbnail_path),
            str(THUMB_WIDTH),
            "--height", str(THUMB_HEIGHT),
            "--size", "down",  # never upscale
            "--crop", "attention",  # Crop to the interesting bit
        ]

        result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        logging.info(f"Thumbnail created successfully at {thumbnail_path}")
        return True
    except subprocess.CalledProcessError as e:
        logging.warning(f"Failed to create/update thumbnail for {image_path}: {e.stderr.decode().strip()}")
        return False
    except Exception as e:
        logging.warning(f"An error occurred while creating thumbnail for {image_path}: {str(e)}")
        return False


 def extract_thumbnail_url_html(htmlcontent):
    """
    Extracts the first admissible image URL from HTML content.

    Args:
        htmlcontent (str): The HTML content as a string.

    Returns:
        str or None: The image URL if found, else None.
    """
    soup = BeautifulSoup(htmlcontent, 'html.parser')
    if soup.body:
        for img in soup.find_all('img'):
            img_url = img.get('src')
            if not img_url or img_url.startswith(('http', 'data:')):
                logging.debug(f"Skipping image with URL: {img_url}")
                continue

            # Find all parent Divs with 'figure' or 'illustration' classes
            parent_divs = img.find_parents('div', class_=lambda x: x and ('figure' in x or 'illustration' in x))
            if not parent_divs:
                logging.debug(f"No parent Div with 'figure' or 'illustration' classes found for image: {img_url}")
                continue

            # Check if any of the parent Divs have 'foreign' or 'sep' classes
            skip_image = False
            for div in parent_divs:
                classes = div.get('class', [])
                if 'foreign' in classes or 'sep' in classes:
                    logging.debug(f"Skipping image '{img_url}' inside Div with classes: {classes}")
                    skip_image = True
                    break

            if skip_image:
                continue

            # If none of the parent Divs have 'foreign' or 'sep', process this image
            logging.debug(f"Selected image for thumbnail: {img_url}")
            return img_url

    return None


 def extract_thumbnail_url_md(content_lines):
    """
    Given the lines of a Quarto/Pandoc markdown file, find the FIRST image
    in a Div block that:
      - has class .figure
      - does NOT have class .foreign

    Args:
        content_lines (list): List of lines from the markdown content.

    Returns:
        str or None: The image path if found, else None.
    """
    div_start_pattern = re.compile(r'^:::\{([^}]*)\}\s*$')  # e.g., :::{#fig-pews .figure .illustration .right}
    div_end_pattern = re.compile(r'^:::\s*$')

    def parse_div_attrs(attrs_str):
        """
        Parses the attributes string of a Div block.

        Args:
            attrs_str (str): The attributes string, e.g., "#id .class1 .class2".

        Returns:
            dict: Dictionary with "id" and "classes" keys.
        """
        pieces = attrs_str.split()
        id_ = None
        classes = []
        for piece in pieces:
            if piece.startswith('#'):
                id_ = piece[1:]
            elif piece.startswith('.'):
                classes.append(piece[1:])
        return {"id": id_, "classes": set(classes)}

    in_div_block = False
    div_attrs = None
    block_lines = []

    for idx, line in enumerate(content_lines, start=1):
        if not in_div_block:
            # Look for the start of a DIV
            m = div_start_pattern.match(line)
            if m:
                in_div_block = True
                div_attrs = parse_div_attrs(m.group(1))
                logging.debug(f"Entering DIV block at line {idx} with classes: {div_attrs['classes']}")
                block_lines = []
        else:
            # Inside a DIV block
            if div_end_pattern.match(line):
                # End of DIV block
                logging.debug(f"Exiting DIV block at line {idx} with classes: {div_attrs['classes']}")
                if "figure" in div_attrs["classes"] and "foreign" not in div_attrs["classes"]:
                    # Look for the FIRST image link in the block_lines
                    image_pattern = re.compile(r'!\[.*?\]\(([^)]+)\)')
                    for bl in block_lines:
                        img_match = image_pattern.search(bl)
                        if img_match:
                            img_path = img_match.group(1)
                            logging.debug(f"Selected image for thumbnail from markdown: {img_path}")
                            return img_path  # Return the file path
                # Reset state
                in_div_block = False
                div_attrs = None
                block_lines = []
            else:
                # Accumulate lines within the DIV block
                block_lines.append(line)

    return None


 def resolve_image_path(img_url, fname, site_root=Path('.')):
    """
    Resolves the filesystem path of an image based on its URL.

    Args:
        img_url (str): The image URL from the markdown.
        fname (Path): Path to the `.qmd` file.
        site_root (Path): The root directory of the site.

    Returns:
        Path: The resolved filesystem path to the image.
    """
    if img_url.startswith('/'):
        # Absolute path: relative to site root
        filesystem_path = site_root / img_url.lstrip('/')
        logging.debug(f"Resolved absolute image URL '{img_url}' to filesystem path '{filesystem_path}'")
    else:
        # Relative path: relative to the `.qmd` file's directory
        filesystem_path = fname.parent / img_url
        logging.debug(f"Resolved relative image URL '{img_url}' to filesystem path '{filesystem_path}'")
    return filesystem_path.resolve()


 def resolve_thumbnail_path(thumbnail_url, fname, site_root=Path('.')):
    """
    Resolves the filesystem path of a thumbnail based on its URL.

    Args:
        thumbnail_url (str): The thumbnail URL to be used in metadata.
        fname (Path): Path to the `.qmd` file.
        site_root (Path): The root directory of the site.

    Returns:
        Path: The resolved filesystem path to the thumbnail.
    """
    if thumbnail_url.startswith('/'):
        # Absolute path: relative to site root
        filesystem_path = site_root / thumbnail_url.lstrip('/')
        logging.debug(f"Resolved absolute thumbnail URL '{thumbnail_url}' to filesystem path '{filesystem_path}'")
    else:
        # Relative path: relative to the `.qmd` file's directory
        filesystem_path = fname.parent / thumbnail_url
        logging.debug(f"Resolved relative thumbnail URL '{thumbnail_url}' to filesystem path '{filesystem_path}'")
    return filesystem_path.resolve()


 def massage_one_file(fname, site_root=Path('.')):
    """
    Processes a single `.qmd` file: extracts the first admissible image, creates a thumbnail, and updates metadata.

    Args:
        fname (Path): Path to the `.qmd` file.
        site_root (Path): The root directory of the site.
    """
    qmdname = fname.with_suffix('.qmd')
    htmlname = site_root / "_site" / fname.with_suffix('.html')
    thumbnail_url = None

    metadata, qmd_lines = read(qmdname)

    # Determine which file to prefer based on modification times
    use_html = False
    if htmlname.exists():
        html_mod_time = get_file_mod_time(htmlname)
        qmd_mod_time = get_file_mod_time(qmdname)
        if html_mod_time > qmd_mod_time:
            use_html = True
            logging.debug(f"HTML file {htmlname} is newer than QMD file {qmdname}. Using HTML for image extraction.")
        else:
            logging.debug(f"HTML file {htmlname} is not newer than QMD file {qmdname}. Using QMD for image extraction.")
    else:
        logging.debug(f"HTML file {htmlname} does not exist. Using QMD for image extraction.")

    if use_html:
        try:
            with open(htmlname, 'r', encoding='utf8') as html_fp:
                html_content = html_fp.read()
            img_url = extract_thumbnail_url_html(html_content)
        except Exception as e:
            logging.warning(f"Failed to read HTML file {htmlname}: {str(e)}")
            img_url = extract_thumbnail_url_md(qmd_lines)
    else:
        img_url = extract_thumbnail_url_md(qmd_lines)

    if img_url is None:
        logging.warning(f"Could not find image URL for {qmdname}")
        return

    # Thumbnail URL has the extension changed to .thumbnail.avif
    thumbnail_url = re.sub(r'\.[^.]+$', '.thumbnail.avif', img_url)

    # Resolve filesystem paths
    img_path = resolve_image_path(img_url, fname, site_root)
    thumbnail_path = resolve_thumbnail_path(thumbnail_url, fname, site_root)

    logging.debug(f"Image path: {img_path}")
    logging.debug(f"Thumbnail path: {thumbnail_path}")

    if get_file_mod_time(thumbnail_path) > get_file_mod_time(img_path):
        logging.info(f"Thumbnail for {img_url} is up to date.")
        return

    if create_thumbnail(img_path, thumbnail_path):
        # Successfully created or updated thumbnail
        old_thumbnail = metadata.get('image', None)
        if old_thumbnail != thumbnail_url:
            metadata['image'] = thumbnail_url
            logging.info(f"{qmdname} metadata updated to include thumbnail {thumbnail_url}.")
            write(qmdname, metadata, "".join(qmd_lines))
            if htmlname.exists():
                try:
                    with open(htmlname, 'r', encoding='utf8') as html_fp:
                        html_lines = html_fp.read()
                    write(htmlname, metadata, html_lines)
                except Exception as e:
                    logging.warning(f"Failed to update HTML file {htmlname}: {str(e)}")
    else:
        # Thumbnail creation failed
        logging.warning(f"Failed to create thumbnail for {img_url} in {qmdname}")


 def process_files(files: list):
    """
    Main function to process multiple `.qmd` files.

    Args:
        files (list): List of file paths to process.
    """
    file_list = []

    if files:
        for fname in files:
            path = Path(fname)
            if path.is_file():
                file_list.append(path)
            else:
                logger.warning(f"File not found or is not a file: {fname}")
    else:
        ## If file_list is empty, use default glob
        logger.info(
            "No files provided. Using default glob pattern '**/*.qmd' excluding files starting with '_'."
        )
        # Use rglob to search recursively
        for path in Path(".").rglob("*.qmd"):
            if not path.name.startswith("_"):
                file_list.append(path)

    if not file_list:
        logger.warning("No .qmd files found to process.")
        return

    for fname in file_list:
        if fname.is_file():
            logging.info(f"Processing file: {fname}")
            massage_one_file(fname)


 def parse_arguments():
    """
    Parses command-line arguments.
    """
    parser = argparse.ArgumentParser(
        description="Update image thumbnails in Quarto/Pandoc markdown files."
    )
    parser.add_argument(
        "files",
        nargs="*",
        help="List of files to process. If omitted, all '*.qmd' files (excluding those starting with '_') will be processed.",
    )
    parser.add_argument(
        "--log-level",
        type=str,
        default="INFO",
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
        help="Set the logging level (default: INFO).",
    )
    return parser.parse_args()


 def main():
    args = parse_arguments()
    configure_logging(args.log_level)

    # Check if the environment variable ON_GITHUB is set to a truthy value
    if os.getenv('ON_GITHUB', '').lower() in ('1', 'true', 'yes'):
        logger.info("Running on GitHub Actions. We cannot process images here, so lets assume nothing to do")
        return
        
    process_files(files=args.files)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	r"""
	Update doc metadata by parsing HTML version for images, generating thumbnails of the first admissible image, and updating the metadata with the thumbnail URL.
	"""

	from pathlib import Path
	import subprocess
	import argparse
	import logging
	from ruamel.yaml import YAML
	from bs4 import BeautifulSoup
	import os
	import re

	# Initialize logger without configuring it yet
	logger = logging.getLogger(__name__)


	def configure_logging(log_level: str):
	"""
	Configures the logging level and format.

	Args:
	log_level (str): The logging level as a string.
	"""
	numeric_level = getattr(logging, log_level.upper(), None)
	if not isinstance(numeric_level, int):
	logger.warning(f"Invalid log level: {log_level}. Defaulting to INFO.")
	numeric_level = logging.INFO

	logging.basicConfig(level=numeric_level, format='%(levelname)s: %(message)s')


	THUMB_WIDTH = 320
	THUMB_HEIGHT = 320

	yaml = YAML(typ='rt')


	def read(fname):
	"""
	Reads a Quarto/Pandoc markdown file and extracts its YAML metadata and content.

	Args:
	fname (Path): Path to the `.qmd` file.

	Returns:
	tuple: A dictionary of metadata and a list of content lines.
	"""
	metadata = {}
	outlines = []
	try:
	with open(fname, 'r', encoding='utf8') as fp:
	lines = fp.readlines()

	if not lines:
	return {}, []

	if lines[0].strip() == '---': # YAML header
	# Load the data we need to parse
	to_parse = []
	i = 1
	for i, line in enumerate(lines[1:], start=1):
	# When we find a terminator (`---` or '...'), stop.
	if line.strip() in ('---', '...'):
	break
	to_parse.append(line)

	parsed = yaml.load("".join(to_parse))

	for k, v in parsed.items():
	metadata[k.lower()] = v

	content_start = i + 1
	outlines = lines[content_start:]
	else:
	# Handle files without YAML front matter
	for i, line in enumerate(lines):
	kv = line.split(':', 1)
	if len(kv) == 2:
	name, value = kv[0].lower(), kv[1].strip()
	metadata[name] = value
	else:
	break
	outlines = lines[i+1:]

	return metadata, outlines
	except Exception as e:
	logging.warning(f"Failed to read {fname}: {str(e)}")
	return {}, []


	def write(fname, metadata, content):
	"""
	Writes the YAML metadata and content back to a `.qmd` file.

	Args:
	fname (Path): Path to the `.qmd` file.
	metadata (dict): Metadata to write.
	content (str): Markdown content to write.
	"""
	try:
	with open(fname, 'w', encoding='utf8') as fp:
	fp.write('---\n')
	yaml.dump(metadata, fp)
	fp.write('---\n')
	fp.write(content)
	logging.info(f"Successfully wrote updated metadata to {fname}")
	except Exception as e:
	logging.warning(f"Failed to write {fname}: {str(e)}")


	def get_file_mod_time(path):
	"""
	Gets the modification time of a file.

	Args:
	path (Path): Path to the file.

	Returns:
	float: Modification time in seconds since the epoch.
	"""
	try:
	return os.path.getmtime(path)
	except OSError:
	# Return the Unix epoch time if the file does not exist
	return 0.0


	def create_thumbnail(image_path, thumbnail_path):
	"""
	Creates a thumbnail of the given image using VIPS.

	Args:
	image_path (Path): Path to the original image.
	thumbnail_path (Path): Path where the thumbnail will be saved.

	Returns:
	bool: True if thumbnail creation was successful, False otherwise.
	"""
	try:
	# Ensure the parent directory exists
	thumbnail_path.parent.mkdir(parents=True, exist_ok=True)

	# Construct the command
	cmd = [
	"vips",
	"thumbnail",
	str(image_path),
	str(thumbnail_path),
	str(THUMB_WIDTH),
	"--height", str(THUMB_HEIGHT),
	"--size", "down", # never upscale
	"--crop", "attention", # Crop to the interesting bit
	]

	result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	logging.info(f"Thumbnail created successfully at {thumbnail_path}")
	return True
	except subprocess.CalledProcessError as e:
	logging.warning(f"Failed to create/update thumbnail for {image_path}: {e.stderr.decode().strip()}")
	return False
	except Exception as e:
	logging.warning(f"An error occurred while creating thumbnail for {image_path}: {str(e)}")
	return False


	def extract_thumbnail_url_html(htmlcontent):
	"""
	Extracts the first admissible image URL from HTML content.

	Args:
	htmlcontent (str): The HTML content as a string.

	Returns:
	str or None: The image URL if found, else None.
	"""
	soup = BeautifulSoup(htmlcontent, 'html.parser')
	if soup.body:
	for img in soup.find_all('img'):
	img_url = img.get('src')
	if not img_url or img_url.startswith(('http', 'data:')):
	logging.debug(f"Skipping image with URL: {img_url}")
	continue

	# Find all parent Divs with 'figure' or 'illustration' classes
	parent_divs = img.find_parents('div', class_=lambda x: x and ('figure' in x or 'illustration' in x))
	if not parent_divs:
	logging.debug(f"No parent Div with 'figure' or 'illustration' classes found for image: {img_url}")
	continue

	# Check if any of the parent Divs have 'foreign' or 'sep' classes
	skip_image = False
	for div in parent_divs:
	classes = div.get('class', [])
	if 'foreign' in classes or 'sep' in classes:
	logging.debug(f"Skipping image '{img_url}' inside Div with classes: {classes}")
	skip_image = True
	break

	if skip_image:
	continue

	# If none of the parent Divs have 'foreign' or 'sep', process this image
	logging.debug(f"Selected image for thumbnail: {img_url}")
	return img_url

	return None


	def extract_thumbnail_url_md(content_lines):
	"""
	Given the lines of a Quarto/Pandoc markdown file, find the FIRST image
	in a Div block that:
	- has class .figure
	- does NOT have class .foreign

	Args:
	content_lines (list): List of lines from the markdown content.

	Returns:
	str or None: The image path if found, else None.
	"""
	div_start_pattern = re.compile(r'^:::\{([^}])\}\s$') # e.g., :::{#fig-pews .figure .illustration .right}
	div_end_pattern = re.compile(r'^:::\s*$')

	def parse_div_attrs(attrs_str):
	"""
	Parses the attributes string of a Div block.

	Args:
	attrs_str (str): The attributes string, e.g., "#id .class1 .class2".

	Returns:
	dict: Dictionary with "id" and "classes" keys.
	"""
	pieces = attrs_str.split()
	id_ = None
	classes = []
	for piece in pieces:
	if piece.startswith('#'):
	id_ = piece[1:]
	elif piece.startswith('.'):
	classes.append(piece[1:])
	return {"id": id_, "classes": set(classes)}

	in_div_block = False
	div_attrs = None
	block_lines = []

	for idx, line in enumerate(content_lines, start=1):
	if not in_div_block:
	# Look for the start of a DIV
	m = div_start_pattern.match(line)
	if m:
	in_div_block = True
	div_attrs = parse_div_attrs(m.group(1))
	logging.debug(f"Entering DIV block at line {idx} with classes: {div_attrs['classes']}")
	block_lines = []
	else:
	# Inside a DIV block
	if div_end_pattern.match(line):
	# End of DIV block
	logging.debug(f"Exiting DIV block at line {idx} with classes: {div_attrs['classes']}")
	if "figure" in div_attrs["classes"] and "foreign" not in div_attrs["classes"]:
	# Look for the FIRST image link in the block_lines
	image_pattern = re.compile(r'!\[.*?\]\(([^)]+)\)')
	for bl in block_lines:
	img_match = image_pattern.search(bl)
	if img_match:
	img_path = img_match.group(1)
	logging.debug(f"Selected image for thumbnail from markdown: {img_path}")
	return img_path # Return the file path
	# Reset state
	in_div_block = False
	div_attrs = None
	block_lines = []
	else:
	# Accumulate lines within the DIV block
	block_lines.append(line)

	return None


	def resolve_image_path(img_url, fname, site_root=Path('.')):
	"""
	Resolves the filesystem path of an image based on its URL.

	Args:
	img_url (str): The image URL from the markdown.
	fname (Path): Path to the `.qmd` file.
	site_root (Path): The root directory of the site.

	Returns:
	Path: The resolved filesystem path to the image.
	"""
	if img_url.startswith('/'):
	# Absolute path: relative to site root
	filesystem_path = site_root / img_url.lstrip('/')
	logging.debug(f"Resolved absolute image URL '{img_url}' to filesystem path '{filesystem_path}'")
	else:
	# Relative path: relative to the `.qmd` file's directory
	filesystem_path = fname.parent / img_url
	logging.debug(f"Resolved relative image URL '{img_url}' to filesystem path '{filesystem_path}'")
	return filesystem_path.resolve()


	def resolve_thumbnail_path(thumbnail_url, fname, site_root=Path('.')):
	"""
	Resolves the filesystem path of a thumbnail based on its URL.

	Args:
	thumbnail_url (str): The thumbnail URL to be used in metadata.
	fname (Path): Path to the `.qmd` file.
	site_root (Path): The root directory of the site.

	Returns:
	Path: The resolved filesystem path to the thumbnail.
	"""
	if thumbnail_url.startswith('/'):
	# Absolute path: relative to site root
	filesystem_path = site_root / thumbnail_url.lstrip('/')
	logging.debug(f"Resolved absolute thumbnail URL '{thumbnail_url}' to filesystem path '{filesystem_path}'")
	else:
	# Relative path: relative to the `.qmd` file's directory
	filesystem_path = fname.parent / thumbnail_url
	logging.debug(f"Resolved relative thumbnail URL '{thumbnail_url}' to filesystem path '{filesystem_path}'")
	return filesystem_path.resolve()


	def massage_one_file(fname, site_root=Path('.')):
	"""
	Processes a single `.qmd` file: extracts the first admissible image, creates a thumbnail, and updates metadata.

	Args:
	fname (Path): Path to the `.qmd` file.
	site_root (Path): The root directory of the site.
	"""
	qmdname = fname.with_suffix('.qmd')
	htmlname = site_root / "_site" / fname.with_suffix('.html')
	thumbnail_url = None

	metadata, qmd_lines = read(qmdname)

	# Determine which file to prefer based on modification times
	use_html = False
	if htmlname.exists():
	html_mod_time = get_file_mod_time(htmlname)
	qmd_mod_time = get_file_mod_time(qmdname)
	if html_mod_time > qmd_mod_time:
	use_html = True
	logging.debug(f"HTML file {htmlname} is newer than QMD file {qmdname}. Using HTML for image extraction.")
	else:
	logging.debug(f"HTML file {htmlname} is not newer than QMD file {qmdname}. Using QMD for image extraction.")
	else:
	logging.debug(f"HTML file {htmlname} does not exist. Using QMD for image extraction.")

	if use_html:
	try:
	with open(htmlname, 'r', encoding='utf8') as html_fp:
	html_content = html_fp.read()
	img_url = extract_thumbnail_url_html(html_content)
	except Exception as e:
	logging.warning(f"Failed to read HTML file {htmlname}: {str(e)}")
	img_url = extract_thumbnail_url_md(qmd_lines)
	else:
	img_url = extract_thumbnail_url_md(qmd_lines)

	if img_url is None:
	logging.warning(f"Could not find image URL for {qmdname}")
	return

	# Thumbnail URL has the extension changed to .thumbnail.avif
	thumbnail_url = re.sub(r'\.[^.]+$', '.thumbnail.avif', img_url)

	# Resolve filesystem paths
	img_path = resolve_image_path(img_url, fname, site_root)
	thumbnail_path = resolve_thumbnail_path(thumbnail_url, fname, site_root)

	logging.debug(f"Image path: {img_path}")
	logging.debug(f"Thumbnail path: {thumbnail_path}")

	if get_file_mod_time(thumbnail_path) > get_file_mod_time(img_path):
	logging.info(f"Thumbnail for {img_url} is up to date.")
	return

	if create_thumbnail(img_path, thumbnail_path):
	# Successfully created or updated thumbnail
	old_thumbnail = metadata.get('image', None)
	if old_thumbnail != thumbnail_url:
	metadata['image'] = thumbnail_url
	logging.info(f"{qmdname} metadata updated to include thumbnail {thumbnail_url}.")
	write(qmdname, metadata, "".join(qmd_lines))
	if htmlname.exists():
	try:
	with open(htmlname, 'r', encoding='utf8') as html_fp:
	html_lines = html_fp.read()
	write(htmlname, metadata, html_lines)
	except Exception as e:
	logging.warning(f"Failed to update HTML file {htmlname}: {str(e)}")
	else:
	# Thumbnail creation failed
	logging.warning(f"Failed to create thumbnail for {img_url} in {qmdname}")


	def process_files(files: list):
	"""
	Main function to process multiple `.qmd` files.

	Args:
	files (list): List of file paths to process.
	"""
	file_list = []

	if files:
	for fname in files:
	path = Path(fname)
	if path.is_file():
	file_list.append(path)
	else:
	logger.warning(f"File not found or is not a file: {fname}")
	else:
	## If file_list is empty, use default glob
	logger.info(
	"No files provided. Using default glob pattern '*/.qmd' excluding files starting with '_'."
	)
	# Use rglob to search recursively
	for path in Path(".").rglob("*.qmd"):
	if not path.name.startswith("_"):
	file_list.append(path)

	if not file_list:
	logger.warning("No .qmd files found to process.")
	return

	for fname in file_list:
	if fname.is_file():
	logging.info(f"Processing file: {fname}")
	massage_one_file(fname)


	def parse_arguments():
	"""
	Parses command-line arguments.
	"""
	parser = argparse.ArgumentParser(
	description="Update image thumbnails in Quarto/Pandoc markdown files."
	)
	parser.add_argument(
	"files",
	nargs="*",
	help="List of files to process. If omitted, all '*.qmd' files (excluding those starting with '_') will be processed.",
	)
	parser.add_argument(
	"--log-level",
	type=str,
	default="INFO",
	choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
	help="Set the logging level (default: INFO).",
	)
	return parser.parse_args()


	def main():
	args = parse_arguments()
	configure_logging(args.log_level)

	# Check if the environment variable ON_GITHUB is set to a truthy value
	if os.getenv('ON_GITHUB', '').lower() in ('1', 'true', 'yes'):
	logger.info("Running on GitHub Actions. We cannot process images here, so lets assume nothing to do")
	return

	process_files(files=args.files)


	if __name__ == "__main__":
	main()