Created
May 18, 2025 02:02
-
-
Save king-11/bb16961116c49ebc402af7efb938db35 to your computer and use it in GitHub Desktop.
A script to migrate content from hugo to astro which uses admonitions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S uv run --script | |
# /// script | |
# requires-python = ">=3.10" | |
# dependencies = ["pyyaml"] | |
# /// | |
# Hugo to Astro Blog Migration Script | |
# Migrates posts from Hugo format to Astro format with user confirmation for each post | |
import os | |
import re | |
import yaml | |
import shutil | |
import argparse | |
from pathlib import Path | |
from typing import Dict, List, Set, Tuple, Any, Optional | |
# Configuration | |
SCRIPT_DIR = Path(__file__).parent.absolute() | |
def transform_frontmatter(hugo_frontmatter: Dict[str, Any], post_path: str) -> Dict[str, Any]: | |
""" | |
Transforms Hugo frontmatter to Astro frontmatter format | |
""" | |
astro_frontmatter = hugo_frontmatter.copy() | |
# Convert date to publishDate | |
if "date" in astro_frontmatter: | |
astro_frontmatter["publishDate"] = astro_frontmatter["date"] | |
del astro_frontmatter["date"] | |
# Transform cover image | |
if "cover" in astro_frontmatter: | |
cover_image = { | |
"src": "", | |
"alt": astro_frontmatter.get("title", "Cover image") | |
} | |
image_path = astro_frontmatter["cover"].get("image", "") | |
# Just use the basename without ./ prefix | |
cover_image["src"] = os.path.basename(image_path) | |
# Add .webp extension if no extension present | |
if not Path(cover_image["src"]).suffix: | |
cover_image["src"] = f"{cover_image['src']}.webp" | |
astro_frontmatter["coverImage"] = cover_image | |
del astro_frontmatter["cover"] | |
return astro_frontmatter | |
def transform_image_references(content: str) -> str: | |
""" | |
Transforms image references in content to remove ./ prefix and add .webp extension if missing | |
""" | |
def replace_image(match): | |
alt_text = match.group(1) | |
img_path = match.group(2) | |
# Extract the path without extension and the extension (if any) | |
path_parts = os.path.splitext(img_path) | |
basename = path_parts[0] | |
extension = path_parts[1] | |
# If no extension, add .webp | |
if not extension: | |
img_path = f"{basename}.webp" | |
# Return with the original alt text and the fixed path (without ./ prefix) | |
return f'' | |
# Match image references not starting with http, https, ./, or / | |
pattern = r'!\[([^\]]*)\]\((?!http|https|\.\/|\/)(.*?)\)' | |
return re.sub(pattern, replace_image, content) | |
def increase_header_nesting(content: str) -> Tuple[str, bool]: | |
""" | |
Increases the nesting level of all headers by adding one '#' to each header | |
only if single-level headers (# ) are found. | |
Returns: | |
Tuple containing the transformed content and a boolean indicating if changes were made | |
""" | |
# First check if there are any single-level headers | |
has_top_level_headers = bool(re.search(r'^#[ \t]+[^\s#]', content, re.MULTILINE)) | |
if not has_top_level_headers: | |
return content, False | |
def replace_header(match): | |
# Get the number of # characters and add one more | |
hashes = match.group(1) | |
return f'#{hashes} {match.group(2)}' | |
# Match headers at start of line - capture hash symbols and content separately | |
pattern = r'^(#+)[ \t]+(.*?)$' | |
transformed_content = re.sub(pattern, replace_header, content, flags=re.MULTILINE) | |
return transformed_content, True | |
def transform_admonitions(content: str) -> Tuple[str, List[str]]: | |
""" | |
Transforms Hugo-style admonitions to Astro-style admonitions | |
Converts >[!type] to :::type and removes ">" prefixes from content lines | |
Logs invalid admonition types for manual review | |
""" | |
# Define allowed admonition types | |
valid_types = ['important', 'tip', 'note', 'caution', 'warning'] | |
invalid_admonitions_found: Set[str] = set() | |
# Process the content by lines to handle admonitions | |
lines = content.split('\n') | |
in_admonition = False | |
result: List[str] = [] | |
for line in lines: | |
# Check for admonition start | |
admonition_match = re.match(r'^>\[!(\w+)\]', line) | |
if admonition_match and not in_admonition: | |
admonition_type = admonition_match.group(1).lower() | |
# Only transform if it's a valid admonition type | |
if admonition_type in valid_types: | |
in_admonition = True | |
result.append(f":::{admonition_type}") | |
elif admonition_type == "quote" or admonition_type == "": | |
in_admonition = True | |
result.append(":::tip") | |
elif admonition_type == "note": | |
in_admonition = True | |
result.append(":::information") | |
elif admonition_type == "error": | |
in_admonition = True | |
result.append(":::warning") | |
else: | |
# Track invalid admonition types for reporting | |
invalid_admonitions_found.add(admonition_type) | |
# Keep the line as is if it's not a valid admonition type | |
result.append(f":::{admonition_type}") | |
in_admonition = True | |
# Check for the end of admonition (a line that doesn't start with ">") | |
elif in_admonition and not line.startswith('>'): | |
in_admonition = False | |
result.append(':::') | |
result.append(line) | |
# Inside admonition content | |
elif in_admonition: | |
# Remove ">" prefix and trim extra whitespace | |
content_line = line[1:].strip() if line.startswith('>') else line.strip() | |
result.append(content_line) | |
# Regular content | |
else: | |
result.append(line) | |
# Close any open admonition at the end of the content | |
if in_admonition: | |
result.append(':::') | |
# Return both the transformed content and any invalid types found | |
return '\n'.join(result), list(invalid_admonitions_found) | |
def get_confirmation(message: str) -> bool: | |
""" | |
Prompts the user for confirmation | |
""" | |
answer = input(f"{message} (y/n): ").lower() | |
return answer in ('y', 'yes') | |
def parse_selection(selection: str, max_index: int) -> List[int]: | |
""" | |
Parses user selection input that can include: | |
- Single numbers: "5" | |
- Comma-separated values: "1,3,5" | |
- Ranges: "1-4" | |
- Combinations: "1-3,5,7-9" | |
Returns a list of 0-based indices. | |
Invalid indices or formats are ignored. | |
""" | |
if not selection.strip(): | |
return [] | |
indices = [] | |
# Split by comma | |
parts = selection.split(',') | |
for part in parts: | |
part = part.strip() | |
# Check if it's a range (contains '-') | |
if '-' in part: | |
try: | |
start, end = map(int, part.split('-', 1)) | |
# Convert to 0-based and ensure within bounds | |
start_idx = start - 1 | |
end_idx = end - 1 | |
if start_idx < 0: | |
start_idx = 0 | |
if end_idx >= max_index: | |
end_idx = max_index - 1 | |
# Add all indices in the range | |
indices.extend(range(start_idx, end_idx + 1)) | |
except ValueError: | |
# Skip invalid range format | |
continue | |
else: | |
# Try parsing as a single number | |
try: | |
idx = int(part) - 1 | |
if 0 <= idx < max_index: | |
indices.append(idx) | |
except ValueError: | |
# Skip invalid number | |
continue | |
# Return unique indices in ascending order | |
return sorted(set(indices)) | |
def process_post(post_path: str, hugo_posts_dir: Path, astro_posts_dir: Path, hugo_static_dir: Path, public_dir: Path) -> bool: | |
""" | |
Processes a single Hugo post and converts it to Astro format with user confirmation | |
Returns: | |
bool: True if processing was successful, False otherwise | |
""" | |
full_post_path = hugo_posts_dir / post_path | |
is_directory = full_post_path.is_dir() | |
post_file_path = full_post_path / "index.md" if is_directory else full_post_path | |
if not post_file_path.exists(): | |
print(f"Skipping {post_path} - no markdown file found") | |
return False | |
print(f"\nProcessing: {post_path}") | |
post_content = post_file_path.read_text(encoding='utf-8') | |
frontmatter_match = re.match(r'^---\n([\s\S]*?)\n---\n', post_content) | |
if not frontmatter_match: | |
print(f"Skipping {post_path} - no frontmatter found") | |
return False | |
hugo_frontmatter_string = frontmatter_match.group(1) | |
content_without_frontmatter = re.sub(r'^---\n[\s\S]*?\n---\n', '', post_content) | |
try: | |
hugo_frontmatter = yaml.safe_load(hugo_frontmatter_string) | |
astro_frontmatter = transform_frontmatter(hugo_frontmatter, post_path) | |
content_with_images = transform_image_references(content_without_frontmatter) | |
content_with_admonitions, invalid_types = transform_admonitions(content_with_images) | |
transformed_content, headers_adjusted = increase_header_nesting(content_with_admonitions) | |
# Log invalid admonition types if any were found | |
if invalid_types: | |
print(f" ⚠️ Found invalid admonition types in {post_path}: {', '.join(invalid_types)}") | |
print(" Valid types are: important, tip, note, caution, warning") | |
# Log if header nesting was adjusted | |
if headers_adjusted: | |
print(f" ℹ️ Increased header nesting levels in {post_path} (added one # to all headers)") | |
astro_frontmatter_string = yaml.dump(astro_frontmatter, sort_keys=False) | |
print('\n----- ORIGINAL HUGO FRONTMATTER -----') | |
print(hugo_frontmatter_string) | |
print('\n----- TRANSFORMED ASTRO FRONTMATTER -----') | |
print(astro_frontmatter_string) | |
proceed = get_confirmation('Apply these changes?') | |
if not proceed: | |
print(f"⏭️ Skipped {post_path} based on user choice") | |
return False | |
if is_directory: | |
target_dir = astro_posts_dir / Path(post_path).name | |
else: | |
target_dir = astro_posts_dir / Path(post_path).stem | |
# Check if the target post already exists | |
target_post_file = target_dir / "index.md" | |
if target_post_file.exists(): | |
overwrite = get_confirmation(f'Post already exists at {target_post_file.relative_to(Path.cwd())}. Overwrite?') | |
if not overwrite: | |
print(f"⏭️ Skipped {post_path} to avoid overwriting") | |
return True | |
target_dir.mkdir(parents=True, exist_ok=True) | |
with open(target_post_file, 'w', encoding='utf-8') as f: | |
f.write(f"---\n{astro_frontmatter_string}---\n{transformed_content}") | |
print(f"✅ Migrated post to {target_post_file.relative_to(Path.cwd())}") | |
# Copy assets | |
if is_directory: | |
for file_path in full_post_path.iterdir(): | |
if file_path.name != 'index.md' and file_path.is_file(): | |
target_file_name = file_path.name | |
# If the file has no extension, assume it's an image and add .webp | |
if not file_path.suffix: | |
target_file_name = f"{file_path.name}.webp" | |
shutil.copy2(file_path, target_dir / target_file_name) | |
print(f" Copied asset: {file_path.name} -> {target_file_name}") | |
else: | |
# For standalone files, check for cover images | |
if "cover" in hugo_frontmatter and "image" in hugo_frontmatter["cover"]: | |
image_name = Path(hugo_frontmatter["cover"]["image"]).name | |
# If no extension, add .webp | |
if not Path(image_name).suffix: | |
image_name = f"{image_name}.webp" | |
# Original image name for searching | |
original_image_name = Path(hugo_frontmatter["cover"]["image"]).name | |
# Try both static and public directories | |
static_image_path = hugo_static_dir / original_image_name | |
public_image_path = public_dir / original_image_name | |
if static_image_path.exists(): | |
target_path = target_dir / image_name | |
shutil.copy2(static_image_path, target_path) | |
print(f" Copied cover image: {original_image_name} -> {image_name}") | |
elif public_image_path.exists(): | |
target_path = target_dir / image_name | |
shutil.copy2(public_image_path, target_path) | |
print(f" Copied cover image from public: {original_image_name} -> {image_name}") | |
else: | |
print(f" ⚠️ Could not find cover image: {original_image_name}") | |
return True | |
except Exception as error: | |
print(f"❌ Error processing {post_path}: {error}") | |
return False | |
def migrate_hugo_posts(hugo_repo_path: Path) -> None: | |
""" | |
Main function that orchestrates the migration process | |
""" | |
try: | |
# Set up paths based on the provided Hugo repository | |
hugo_posts_dir = hugo_repo_path / "content" / "posts" | |
hugo_static_dir = hugo_repo_path / "static" | |
# Astro paths relative to script location | |
astro_posts_dir = SCRIPT_DIR.parent / "src" / "content" / "post" | |
public_dir = SCRIPT_DIR.parent / "public" | |
print('🚀 Starting Hugo to Astro migration...') | |
print(f'Hugo posts location: {hugo_posts_dir}') | |
print(f'Astro posts destination: {astro_posts_dir}') | |
# Make sure required directories exist | |
if not hugo_posts_dir.exists(): | |
raise FileNotFoundError(f"Hugo posts directory not found at {hugo_posts_dir}") | |
astro_posts_dir.mkdir(parents=True, exist_ok=True) | |
post_paths = [entry.name for entry in hugo_posts_dir.iterdir()] | |
print('\nFound posts to migrate:') | |
for index, post_path in enumerate(post_paths, 1): | |
print(f"{index}. {post_path}") | |
process_all = get_confirmation('\nProcess all posts?') | |
processed_posts = set() | |
if process_all: | |
total_posts = len(post_paths) | |
for index, post_path in enumerate(post_paths, 1): | |
# Skip already processed posts | |
if post_path in processed_posts: | |
continue | |
print(f"\nProcessing {index}/{total_posts}: {post_path}") | |
result = process_post(post_path, hugo_posts_dir, astro_posts_dir, hugo_static_dir, public_dir) | |
if result: | |
processed_posts.add(post_path) | |
else: | |
while True: | |
# Display remaining posts | |
print('\nRemaining posts to process:') | |
remaining_posts = [p for p in post_paths if p not in processed_posts] | |
for index, post_path in enumerate(remaining_posts, 1): | |
print(f"{index}. {post_path}") | |
if not remaining_posts: | |
print("\nAll posts have been processed!") | |
break | |
selection = input('\nEnter post number(s) to process (e.g. "5", "1,3,5", "1-4", "1-3,5,7-9" or "done" to finish): ') | |
if selection.lower() == 'done': | |
break | |
# Parse the selection input | |
indices = parse_selection(selection, len(remaining_posts)) | |
if not indices: | |
print('Invalid selection. Please try again.') | |
continue | |
# Process all selected posts | |
for index in indices: | |
post_path = remaining_posts[index] | |
print(f"\nSelected: {index + 1}. {post_path}") | |
result = process_post(post_path, hugo_posts_dir, astro_posts_dir, hugo_static_dir, public_dir) | |
if result: | |
processed_posts.add(post_path) | |
print('\n✅ Migration completed!') | |
except Exception as error: | |
print(f'\n❌ Migration failed: {error}') | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Migrate Hugo blog posts to Astro format") | |
parser.add_argument("hugo_repo_path", help="Path to the Hugo blog repository root") | |
args = parser.parse_args() | |
# Convert to absolute path | |
hugo_repo_path = Path(args.hugo_repo_path).absolute() | |
migrate_hugo_posts(hugo_repo_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment