Skip to content

Instantly share code, notes, and snippets.

@king-11
Created May 18, 2025 02:02
Show Gist options
  • Save king-11/bb16961116c49ebc402af7efb938db35 to your computer and use it in GitHub Desktop.
Save king-11/bb16961116c49ebc402af7efb938db35 to your computer and use it in GitHub Desktop.
A script to migrate content from hugo to astro which uses admonitions
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.10"
# dependencies = ["pyyaml"]
# ///
# Hugo to Astro Blog Migration Script
# Migrates posts from Hugo format to Astro format with user confirmation for each post
import os
import re
import yaml
import shutil
import argparse
from pathlib import Path
from typing import Dict, List, Set, Tuple, Any, Optional
# Configuration
SCRIPT_DIR = Path(__file__).parent.absolute()
def transform_frontmatter(hugo_frontmatter: Dict[str, Any], post_path: str) -> Dict[str, Any]:
"""
Transforms Hugo frontmatter to Astro frontmatter format
"""
astro_frontmatter = hugo_frontmatter.copy()
# Convert date to publishDate
if "date" in astro_frontmatter:
astro_frontmatter["publishDate"] = astro_frontmatter["date"]
del astro_frontmatter["date"]
# Transform cover image
if "cover" in astro_frontmatter:
cover_image = {
"src": "",
"alt": astro_frontmatter.get("title", "Cover image")
}
image_path = astro_frontmatter["cover"].get("image", "")
# Just use the basename without ./ prefix
cover_image["src"] = os.path.basename(image_path)
# Add .webp extension if no extension present
if not Path(cover_image["src"]).suffix:
cover_image["src"] = f"{cover_image['src']}.webp"
astro_frontmatter["coverImage"] = cover_image
del astro_frontmatter["cover"]
return astro_frontmatter
def transform_image_references(content: str) -> str:
"""
Transforms image references in content to remove ./ prefix and add .webp extension if missing
"""
def replace_image(match):
alt_text = match.group(1)
img_path = match.group(2)
# Extract the path without extension and the extension (if any)
path_parts = os.path.splitext(img_path)
basename = path_parts[0]
extension = path_parts[1]
# If no extension, add .webp
if not extension:
img_path = f"{basename}.webp"
# Return with the original alt text and the fixed path (without ./ prefix)
return f'![{alt_text}]({img_path})'
# Match image references not starting with http, https, ./, or /
pattern = r'!\[([^\]]*)\]\((?!http|https|\.\/|\/)(.*?)\)'
return re.sub(pattern, replace_image, content)
def increase_header_nesting(content: str) -> Tuple[str, bool]:
"""
Increases the nesting level of all headers by adding one '#' to each header
only if single-level headers (# ) are found.
Returns:
Tuple containing the transformed content and a boolean indicating if changes were made
"""
# First check if there are any single-level headers
has_top_level_headers = bool(re.search(r'^#[ \t]+[^\s#]', content, re.MULTILINE))
if not has_top_level_headers:
return content, False
def replace_header(match):
# Get the number of # characters and add one more
hashes = match.group(1)
return f'#{hashes} {match.group(2)}'
# Match headers at start of line - capture hash symbols and content separately
pattern = r'^(#+)[ \t]+(.*?)$'
transformed_content = re.sub(pattern, replace_header, content, flags=re.MULTILINE)
return transformed_content, True
def transform_admonitions(content: str) -> Tuple[str, List[str]]:
"""
Transforms Hugo-style admonitions to Astro-style admonitions
Converts >[!type] to :::type and removes ">" prefixes from content lines
Logs invalid admonition types for manual review
"""
# Define allowed admonition types
valid_types = ['important', 'tip', 'note', 'caution', 'warning']
invalid_admonitions_found: Set[str] = set()
# Process the content by lines to handle admonitions
lines = content.split('\n')
in_admonition = False
result: List[str] = []
for line in lines:
# Check for admonition start
admonition_match = re.match(r'^>\[!(\w+)\]', line)
if admonition_match and not in_admonition:
admonition_type = admonition_match.group(1).lower()
# Only transform if it's a valid admonition type
if admonition_type in valid_types:
in_admonition = True
result.append(f":::{admonition_type}")
elif admonition_type == "quote" or admonition_type == "":
in_admonition = True
result.append(":::tip")
elif admonition_type == "note":
in_admonition = True
result.append(":::information")
elif admonition_type == "error":
in_admonition = True
result.append(":::warning")
else:
# Track invalid admonition types for reporting
invalid_admonitions_found.add(admonition_type)
# Keep the line as is if it's not a valid admonition type
result.append(f":::{admonition_type}")
in_admonition = True
# Check for the end of admonition (a line that doesn't start with ">")
elif in_admonition and not line.startswith('>'):
in_admonition = False
result.append(':::')
result.append(line)
# Inside admonition content
elif in_admonition:
# Remove ">" prefix and trim extra whitespace
content_line = line[1:].strip() if line.startswith('>') else line.strip()
result.append(content_line)
# Regular content
else:
result.append(line)
# Close any open admonition at the end of the content
if in_admonition:
result.append(':::')
# Return both the transformed content and any invalid types found
return '\n'.join(result), list(invalid_admonitions_found)
def get_confirmation(message: str) -> bool:
"""
Prompts the user for confirmation
"""
answer = input(f"{message} (y/n): ").lower()
return answer in ('y', 'yes')
def parse_selection(selection: str, max_index: int) -> List[int]:
"""
Parses user selection input that can include:
- Single numbers: "5"
- Comma-separated values: "1,3,5"
- Ranges: "1-4"
- Combinations: "1-3,5,7-9"
Returns a list of 0-based indices.
Invalid indices or formats are ignored.
"""
if not selection.strip():
return []
indices = []
# Split by comma
parts = selection.split(',')
for part in parts:
part = part.strip()
# Check if it's a range (contains '-')
if '-' in part:
try:
start, end = map(int, part.split('-', 1))
# Convert to 0-based and ensure within bounds
start_idx = start - 1
end_idx = end - 1
if start_idx < 0:
start_idx = 0
if end_idx >= max_index:
end_idx = max_index - 1
# Add all indices in the range
indices.extend(range(start_idx, end_idx + 1))
except ValueError:
# Skip invalid range format
continue
else:
# Try parsing as a single number
try:
idx = int(part) - 1
if 0 <= idx < max_index:
indices.append(idx)
except ValueError:
# Skip invalid number
continue
# Return unique indices in ascending order
return sorted(set(indices))
def process_post(post_path: str, hugo_posts_dir: Path, astro_posts_dir: Path, hugo_static_dir: Path, public_dir: Path) -> bool:
"""
Processes a single Hugo post and converts it to Astro format with user confirmation
Returns:
bool: True if processing was successful, False otherwise
"""
full_post_path = hugo_posts_dir / post_path
is_directory = full_post_path.is_dir()
post_file_path = full_post_path / "index.md" if is_directory else full_post_path
if not post_file_path.exists():
print(f"Skipping {post_path} - no markdown file found")
return False
print(f"\nProcessing: {post_path}")
post_content = post_file_path.read_text(encoding='utf-8')
frontmatter_match = re.match(r'^---\n([\s\S]*?)\n---\n', post_content)
if not frontmatter_match:
print(f"Skipping {post_path} - no frontmatter found")
return False
hugo_frontmatter_string = frontmatter_match.group(1)
content_without_frontmatter = re.sub(r'^---\n[\s\S]*?\n---\n', '', post_content)
try:
hugo_frontmatter = yaml.safe_load(hugo_frontmatter_string)
astro_frontmatter = transform_frontmatter(hugo_frontmatter, post_path)
content_with_images = transform_image_references(content_without_frontmatter)
content_with_admonitions, invalid_types = transform_admonitions(content_with_images)
transformed_content, headers_adjusted = increase_header_nesting(content_with_admonitions)
# Log invalid admonition types if any were found
if invalid_types:
print(f" ⚠️ Found invalid admonition types in {post_path}: {', '.join(invalid_types)}")
print(" Valid types are: important, tip, note, caution, warning")
# Log if header nesting was adjusted
if headers_adjusted:
print(f" ℹ️ Increased header nesting levels in {post_path} (added one # to all headers)")
astro_frontmatter_string = yaml.dump(astro_frontmatter, sort_keys=False)
print('\n----- ORIGINAL HUGO FRONTMATTER -----')
print(hugo_frontmatter_string)
print('\n----- TRANSFORMED ASTRO FRONTMATTER -----')
print(astro_frontmatter_string)
proceed = get_confirmation('Apply these changes?')
if not proceed:
print(f"⏭️ Skipped {post_path} based on user choice")
return False
if is_directory:
target_dir = astro_posts_dir / Path(post_path).name
else:
target_dir = astro_posts_dir / Path(post_path).stem
# Check if the target post already exists
target_post_file = target_dir / "index.md"
if target_post_file.exists():
overwrite = get_confirmation(f'Post already exists at {target_post_file.relative_to(Path.cwd())}. Overwrite?')
if not overwrite:
print(f"⏭️ Skipped {post_path} to avoid overwriting")
return True
target_dir.mkdir(parents=True, exist_ok=True)
with open(target_post_file, 'w', encoding='utf-8') as f:
f.write(f"---\n{astro_frontmatter_string}---\n{transformed_content}")
print(f"✅ Migrated post to {target_post_file.relative_to(Path.cwd())}")
# Copy assets
if is_directory:
for file_path in full_post_path.iterdir():
if file_path.name != 'index.md' and file_path.is_file():
target_file_name = file_path.name
# If the file has no extension, assume it's an image and add .webp
if not file_path.suffix:
target_file_name = f"{file_path.name}.webp"
shutil.copy2(file_path, target_dir / target_file_name)
print(f" Copied asset: {file_path.name} -> {target_file_name}")
else:
# For standalone files, check for cover images
if "cover" in hugo_frontmatter and "image" in hugo_frontmatter["cover"]:
image_name = Path(hugo_frontmatter["cover"]["image"]).name
# If no extension, add .webp
if not Path(image_name).suffix:
image_name = f"{image_name}.webp"
# Original image name for searching
original_image_name = Path(hugo_frontmatter["cover"]["image"]).name
# Try both static and public directories
static_image_path = hugo_static_dir / original_image_name
public_image_path = public_dir / original_image_name
if static_image_path.exists():
target_path = target_dir / image_name
shutil.copy2(static_image_path, target_path)
print(f" Copied cover image: {original_image_name} -> {image_name}")
elif public_image_path.exists():
target_path = target_dir / image_name
shutil.copy2(public_image_path, target_path)
print(f" Copied cover image from public: {original_image_name} -> {image_name}")
else:
print(f" ⚠️ Could not find cover image: {original_image_name}")
return True
except Exception as error:
print(f"❌ Error processing {post_path}: {error}")
return False
def migrate_hugo_posts(hugo_repo_path: Path) -> None:
"""
Main function that orchestrates the migration process
"""
try:
# Set up paths based on the provided Hugo repository
hugo_posts_dir = hugo_repo_path / "content" / "posts"
hugo_static_dir = hugo_repo_path / "static"
# Astro paths relative to script location
astro_posts_dir = SCRIPT_DIR.parent / "src" / "content" / "post"
public_dir = SCRIPT_DIR.parent / "public"
print('🚀 Starting Hugo to Astro migration...')
print(f'Hugo posts location: {hugo_posts_dir}')
print(f'Astro posts destination: {astro_posts_dir}')
# Make sure required directories exist
if not hugo_posts_dir.exists():
raise FileNotFoundError(f"Hugo posts directory not found at {hugo_posts_dir}")
astro_posts_dir.mkdir(parents=True, exist_ok=True)
post_paths = [entry.name for entry in hugo_posts_dir.iterdir()]
print('\nFound posts to migrate:')
for index, post_path in enumerate(post_paths, 1):
print(f"{index}. {post_path}")
process_all = get_confirmation('\nProcess all posts?')
processed_posts = set()
if process_all:
total_posts = len(post_paths)
for index, post_path in enumerate(post_paths, 1):
# Skip already processed posts
if post_path in processed_posts:
continue
print(f"\nProcessing {index}/{total_posts}: {post_path}")
result = process_post(post_path, hugo_posts_dir, astro_posts_dir, hugo_static_dir, public_dir)
if result:
processed_posts.add(post_path)
else:
while True:
# Display remaining posts
print('\nRemaining posts to process:')
remaining_posts = [p for p in post_paths if p not in processed_posts]
for index, post_path in enumerate(remaining_posts, 1):
print(f"{index}. {post_path}")
if not remaining_posts:
print("\nAll posts have been processed!")
break
selection = input('\nEnter post number(s) to process (e.g. "5", "1,3,5", "1-4", "1-3,5,7-9" or "done" to finish): ')
if selection.lower() == 'done':
break
# Parse the selection input
indices = parse_selection(selection, len(remaining_posts))
if not indices:
print('Invalid selection. Please try again.')
continue
# Process all selected posts
for index in indices:
post_path = remaining_posts[index]
print(f"\nSelected: {index + 1}. {post_path}")
result = process_post(post_path, hugo_posts_dir, astro_posts_dir, hugo_static_dir, public_dir)
if result:
processed_posts.add(post_path)
print('\n✅ Migration completed!')
except Exception as error:
print(f'\n❌ Migration failed: {error}')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Migrate Hugo blog posts to Astro format")
parser.add_argument("hugo_repo_path", help="Path to the Hugo blog repository root")
args = parser.parse_args()
# Convert to absolute path
hugo_repo_path = Path(args.hugo_repo_path).absolute()
migrate_hugo_posts(hugo_repo_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment