Last active
May 20, 2025 10:15
-
-
Save dstreefkerk/06af5de795ca39f8cc8ae8eb38251e2b to your computer and use it in GitHub Desktop.
Quick and dirty script to migrate wordpress.com XML exported posts to Jekyll markdown, and update/check them using Claude Code.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
WordPress to Jekyll Migration Script using Claude Code CLI | |
This script: | |
1. Takes a WordPress export XML file path as input | |
2. Lists all matching posts based on runtime parameters | |
3. Allows the user to select which posts to migrate | |
4. Extracts posts from the WordPress export | |
5. Uses Claude CLI to convert each post to Jekyll format (or simulates conversion with --dry-run) | |
6. Runs a quality check on each converted post using Claude CLI (or simulates with --dry-run) | |
7. Saves the final Jekyll posts in the specified output directory (or simulates saving with --dry-run) | |
Usage: | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --dry-run | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --prompt-file jekyll-migration-prompt.md | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --limit 5 | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --categories technology,programming --tags windows,scripting | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --timeout 600 | |
python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --skip-quality-checks | |
""" | |
import argparse | |
import json | |
import os | |
import re | |
import subprocess | |
import sys | |
import time | |
import xml.etree.ElementTree as ET | |
from datetime import datetime | |
from pathlib import Path | |
from typing import Dict, List, Optional, Tuple, Set | |
# Namespaces used in WordPress export XML | |
NAMESPACES = { | |
"wp": "http://wordpress.org/export/1.2/", | |
"content": "http://purl.org/rss/1.0/modules/content/", | |
"excerpt": "http://wordpress.org/export/1.2/excerpt/", | |
"dc": "http://purl.org/dc/elements/1.1/", | |
} | |
# Default Claude prompt templates (updated to explicitly avoid unwanted text) | |
DEFAULT_CONVERSION_PROMPT = """ | |
# WordPress to GitHub Pages Jekyll Blog Migration Prompt | |
## Purpose | |
Convert the provided WordPress post content into a Jekyll-compatible Markdown format suitable for GitHub Pages, preserving content integrity and adapting to Jekyll formatting standards. | |
## Output Format | |
CRITICAL: Your response must ONLY contain the Jekyll post content itself, starting with the front matter (---). DO NOT include ANY wrapper text, explanations, markdown code block fences (```), filenames, or notes about the conversion process. | |
The output should start with the front matter (---) and continue directly with the post content. Nothing else. | |
## Instructions | |
Convert the provided WordPress post into a Jekyll-compatible Markdown format following these specific guidelines: | |
### Front Matter | |
1. Create Jekyll front matter with the following elements: | |
- `layout: post` | |
- `title: "[Original Post Title]"` (preserve exact title in quotes) | |
- `date: [Original Publication Date in YYYY-MM-DD format]` | |
- `categories: [Appropriate categories from existing Jekyll blog]` | |
- `tags: [Appropriate tags, using hyphenated format for multi-word tags]` | |
- `author: "[Original author name]"` (use dc:creator and wp:author to extract from the source) | |
- `excerpt: "[Short description of the post content]"` (Create a concise, informative excerpt) | |
2. For categories and tags: | |
- Use lowercase, hyphenated format (e.g., `azure-monitor` not `AzureMonitor`) | |
- Prioritize categories already in use on the Jekyll blog, if available | |
- Limit categories to 2-3 primary topics | |
- Include original WordPress tags as Jekyll tags where relevant | |
### Content Formatting | |
1. Convert WordPress HTML formatting to Markdown: | |
- Replace `<a href="URL">text</a>` with `[text](URL)` | |
- Replace HTML lists with Markdown lists | |
- Replace HTML formatting tags with Markdown equivalents | |
- Remove WordPress-specific formatting elements | |
2. Code and Gist Handling: | |
- When code is presented in the original WordPress post as a GitHub gist: | |
- Preserve the gist embed exactly as is, using the script tag format: | |
```html | |
<script src="https://gist.github.com/[username]/[gist-id].js"></script> | |
``` | |
- Do NOT convert gists to markdown code blocks | |
- For regular code blocks not in gists: | |
- Use triple backticks with appropriate language specification | |
- For KQL queries, use `kusto` as the language indicator | |
- For PowerShell code, use `powershell` as the language indicator | |
- When formatting PowerShell code: | |
- Preserve case sensitivity for cmdlets (e.g., `Get-Process`, not `get-process`) | |
- Maintain proper PowerShell styling conventions (CamelCase for variables, PascalCase for functions) | |
- Ensure pipe characters `|` and operators are surrounded by spaces | |
- Ensure no blank rows contain spaces or tabs | |
3. Media: | |
- For images in the original WordPress post: | |
- Retain the image if it is hosted on the same domain as the Jekyll blog | |
- Otherwise, remove the original image tag or link and replace with a descriptive note in italics: | |
``` | |
*[Image removed during migration: Brief description of what the image showed]* | |
``` | |
- Make the description informative enough that readers understand what visual content was there | |
- For embedded content that should remain externally hosted (like videos, GitHub gists, etc.): | |
- Keep the original embed code if compatible with GitHub Pages | |
- Otherwise, replace with a link to the content | |
4. Spelling and Language: | |
- Maintain the original tone and voice | |
- Migrate content without amending the original meaning or wording | |
- Ensure Australian English spelling is used throughout | |
### Special Considerations | |
1. Remove WordPress-specific HTML comments or metadata | |
2. Remove social sharing buttons or WordPress-specific elements | |
3. Preserve internal post structure and headings | |
4. If any internal links point to other WordPress posts, consider updating them to their Jekyll equivalents | |
5. For any links to Microsoft documentation, keep them as-is as they're unlikely to have changed | |
6. Add a note at the top of the post for older content indicating it's outdated using this format: | |
``` | |
> **Note:** This article was originally written in [Month Year] and is now over [X] years old. Due to changes in [relevant technology] over time, the described solution may no longer work as written. Please consider this guide as a conceptual reference rather than a current implementation guide. | |
``` | |
CRITICAL REMINDER: Your output must contain ONLY the Jekyll markdown content. NO explanations, NO code fences, NO "here's the converted post" text. | |
""" | |
QUALITY_CHECK_PROMPT = """ | |
Review and improve this Jekyll post: | |
{jekyll_post} | |
Make the following improvements and return the complete improved post: | |
1. Check for broken URLs and update them if possible, or flag them if there's no update available | |
2. Note any potential security issues at the end of the post | |
3. Flag any outdated tools or techniques mentioned in the post (this post was written in {original_year}) | |
4. Ensure Australian English spelling is used throughout | |
5. Keep the original content and meaning intact but improve clarity if needed | |
6. Return the entire corrected Jekyll post with all front matter | |
CRITICAL: Your output must contain ONLY the Jekyll markdown content. NO explanations, NO code fences, NO "here's the corrected post" text, and NO filename suggestions. Your response must begin with "---" for the front matter and contain nothing else except the Jekyll post content. | |
""" | |
class WpToJekyllConverter: | |
"""WordPress to Jekyll converter using Claude CLI""" | |
def __init__( | |
self, | |
input_file: str, | |
output_dir: str, | |
prompt_file: Optional[str] = None, | |
rate_limit: int = 5, | |
limit: Optional[int] = None, | |
target_categories: Optional[Set[str]] = None, | |
target_tags: Optional[Set[str]] = None, | |
timeout: int = 300, | |
skip_quality_checks: bool = False, | |
dry_run: bool = False, | |
): | |
""" | |
Initialize the converter | |
Args: | |
input_file: Path to WordPress export XML file | |
output_dir: Directory to output Jekyll posts | |
prompt_file: Path to custom migration prompt file | |
rate_limit: Seconds to wait between Claude API calls | |
limit: Maximum number of posts to process | |
target_categories: Set of categories to filter posts by | |
target_tags: Set of tags to filter posts by | |
timeout: Timeout in seconds for Claude API calls | |
skip_quality_checks: Skip quality check step if True | |
dry_run: Simulate conversion without calling Claude or saving files | |
""" | |
self.input_file = input_file | |
self.output_dir = Path(output_dir) | |
self.prompt_file = prompt_file | |
self.rate_limit = rate_limit | |
self.limit = limit | |
self.target_categories = target_categories | |
self.target_tags = target_tags | |
self.timeout = timeout | |
self.skip_quality_checks = skip_quality_checks | |
self.dry_run = dry_run | |
self.conversion_prompt = DEFAULT_CONVERSION_PROMPT | |
# Create output directory if it doesn't exist (only if not in dry run) | |
if not self.dry_run: | |
self.output_dir.mkdir(parents=True, exist_ok=True) | |
# Load custom prompt if specified | |
if self.prompt_file: | |
self.load_custom_prompt() | |
def load_custom_prompt(self): | |
"""Load custom migration prompt from file""" | |
try: | |
with open(self.prompt_file, "r", encoding="utf-8") as f: | |
custom_prompt = f.read() | |
if custom_prompt: | |
if not self.dry_run: | |
print(f"Loaded custom migration prompt from {self.prompt_file}") | |
self.conversion_prompt = custom_prompt | |
except FileNotFoundError: | |
print( | |
f"Warning: Custom prompt file '{self.prompt_file}' not found. Using default prompt.", | |
file=sys.stderr, | |
) | |
except Exception as e: | |
print(f"Error loading custom prompt: {e}", file=sys.stderr) | |
print("Using default prompt instead.", file=sys.stderr) | |
def parse_wordpress_export(self) -> List[Dict]: | |
""" | |
Parse WordPress export XML and extract posts | |
Returns: | |
List of dictionaries containing post data | |
""" | |
try: | |
tree = ET.parse(self.input_file) | |
root = tree.getroot() | |
# Find all item elements that are posts | |
posts = [] | |
for item in root.findall(".//item"): | |
post_type_elem = item.find("./wp:post_type", NAMESPACES) | |
status_elem = item.find("./wp:status", NAMESPACES) | |
# Skip if required elements are missing | |
if post_type_elem is None or status_elem is None: | |
continue | |
# Only process published posts | |
if post_type_elem.text == "post" and status_elem.text == "publish": | |
# Extract post data | |
title_elem = item.find("./title") | |
link_elem = item.find("./link") | |
pub_date_elem = item.find("./pubDate") | |
content_elem = item.find("./content:encoded", NAMESPACES) | |
wp_id_elem = item.find("./wp:post_id", NAMESPACES) | |
slug_elem = item.find("./wp:post_name", NAMESPACES) | |
# Skip if required elements are missing | |
if (title_elem is None or link_elem is None or pub_date_elem is None or | |
content_elem is None or wp_id_elem is None or slug_elem is None): | |
continue | |
title = title_elem.text | |
link = link_elem.text | |
pub_date = pub_date_elem.text | |
content = content_elem.text | |
wp_id = wp_id_elem.text | |
slug = slug_elem.text | |
# Parse date | |
date_obj = datetime.strptime(pub_date, "%a, %d %b %Y %H:%M:%S %z") | |
formatted_date = date_obj.strftime("%Y-%m-%d %H:%M:%S %z") | |
# Extract categories and tags | |
categories = [] | |
tags = [] | |
for category in item.findall("./category"): | |
domain = category.get("domain") | |
if domain == "category": | |
categories.append(category.text) | |
elif domain == "post_tag": | |
tags.append(category.text) | |
# Check if we need to filter by categories or tags | |
if self.target_categories and not any( | |
cat.lower() in [c.lower() for c in self.target_categories] | |
for cat in categories | |
): | |
continue | |
if self.target_tags and not any( | |
tag.lower() in [t.lower() for t in self.target_tags] | |
for tag in tags | |
): | |
continue | |
# Create post data dictionary | |
post_data = { | |
"title": title, | |
"link": link, | |
"date": formatted_date, | |
"year": date_obj.year, | |
"content": content, | |
"categories": categories, | |
"tags": tags, | |
"wp_id": wp_id, | |
"slug": slug, | |
} | |
posts.append(post_data) | |
if not self.dry_run: | |
print(f"Found {len(posts)} posts to convert") | |
# Apply limit if specified | |
if self.limit is not None and len(posts) > self.limit: | |
posts = posts[:self.limit] | |
if not self.dry_run: | |
print(f"Limiting to {self.limit} posts as requested") | |
return posts | |
except ET.ParseError as e: | |
print(f"Error parsing XML file: {e}", file=sys.stderr) | |
sys.exit(1) | |
except Exception as e: | |
print(f"Error processing WordPress export: {e}", file=sys.stderr) | |
sys.exit(1) | |
def display_posts_for_selection(self, posts: List[Dict]) -> List[Dict]: | |
""" | |
Display posts and allow user to select which ones to migrate | |
Args: | |
posts: List of post dictionaries | |
Returns: | |
List of selected post dictionaries | |
""" | |
if not posts: | |
print("No posts found matching the filter criteria.") | |
return [] | |
print("\n===== Available Posts =====") | |
print(f"Found {len(posts)} posts matching your criteria.\n") | |
# Display posts with numbers for selection | |
for i, post in enumerate(posts, 1): | |
date_obj = datetime.strptime(post["date"], "%Y-%m-%d %H:%M:%S %z") | |
formatted_date = date_obj.strftime("%Y-%m-%d") | |
categories_str = ", ".join(post["categories"]) | |
tags_str = ", ".join(post["tags"]) | |
print(f"{i}. [{formatted_date}] {post['title']}") | |
print(f" Categories: {categories_str}") | |
print(f" Tags: {tags_str}") | |
print(f" ID: {post['wp_id']}") | |
print() | |
if self.dry_run: | |
print("[DRY RUN] Would ask for user selection. Selecting all posts for simulation.") | |
return posts | |
# Get user selection | |
while True: | |
try: | |
selection_input = input("\nEnter the numbers of posts to migrate (comma-separated), 'all' for all posts, or 'quit' to exit: ") | |
if selection_input.lower() == 'quit': | |
print("Migration cancelled.") | |
sys.exit(0) | |
if selection_input.lower() == 'all': | |
return posts | |
# Parse the selection indices | |
selection_indices = [int(idx.strip()) for idx in selection_input.split(',')] | |
# Validate the indices | |
if any(idx < 1 or idx > len(posts) for idx in selection_indices): | |
print(f"Invalid selection. Please enter numbers between 1 and {len(posts)}.") | |
continue | |
# Get the selected posts | |
selected_posts = [posts[idx-1] for idx in selection_indices] | |
if not selected_posts: | |
print("No posts selected. Please try again.") | |
continue | |
print(f"\nSelected {len(selected_posts)} posts for migration.") | |
return selected_posts | |
except ValueError: | |
print("Invalid input. Please enter comma-separated numbers, 'all', or 'quit'.") | |
continue | |
except Exception as e: | |
print(f"Error: {e}") | |
print("Please try again.") | |
continue | |
def run_claude( | |
self, prompt: str, output_format: str = "json" | |
) -> Tuple[bool, str, Optional[Dict]]: | |
""" | |
Simulate or run Claude CLI with the given prompt | |
Args: | |
prompt: Prompt to send to Claude | |
output_format: Output format (json or text) | |
Returns: | |
Tuple of (success, message, response_data) | |
""" | |
if self.dry_run: | |
# Simulate successful Claude API call in dry run | |
return True, "Simulated Claude call (dry run)", { | |
"result": f"Simulated Jekyll conversion for prompt: {prompt[:100]}..." | |
} | |
try: | |
# Run Claude CLI with the prompt | |
result = subprocess.run( | |
["claude", "-p", prompt, "--output-format", output_format], | |
capture_output=True, | |
text=True, | |
check=True, | |
timeout=self.timeout, | |
) | |
if output_format == "json": | |
# Parse JSON output | |
response_data = json.loads(result.stdout) | |
return True, "Success", response_data | |
else: | |
return True, "Success", result.stdout | |
except subprocess.TimeoutExpired: | |
error_msg = f"Claude CLI timed out after {self.timeout} seconds" | |
return False, error_msg, None | |
except subprocess.CalledProcessError as e: | |
error_msg = f"Claude CLI error (exit code {e.returncode}): {e.stderr}" | |
return False, error_msg, None | |
except json.JSONDecodeError as e: | |
error_msg = f"Error parsing Claude response as JSON: {e}" | |
return False, error_msg, None | |
except Exception as e: | |
error_msg = f"Unexpected error: {e}" | |
return False, error_msg, None | |
def convert_post(self, post: Dict) -> Tuple[bool, str, Optional[str]]: | |
""" | |
Convert a WordPress post to Jekyll format using Claude (or simulate) | |
Args: | |
post: Dictionary containing post data | |
Returns: | |
Tuple of (success, message, jekyll_post) | |
""" | |
# Build prompt for Claude | |
prompt = self.conversion_prompt + "\n\nHere's the WordPress post:\n\n" | |
prompt += f"Title: {post['title']}\n" | |
prompt += f"Date: {post['date']}\n" | |
prompt += f"Categories: {', '.join(post['categories'])}\n" | |
prompt += f"Tags: {', '.join(post['tags'])}\n" | |
prompt += f"Content:\n{post['content']}\n" | |
prompt += "\nREMINDER: Return ONLY the Jekyll markdown content starting with the front matter. NO explanations, code fences, or other text." | |
# Call Claude to convert the post | |
success, message, response = self.run_claude(prompt) | |
if not success: | |
return False, message, None | |
if self.dry_run: | |
return True, "Simulated post conversion", response.get("result", "") | |
jekyll_post = response.get("result", "") | |
if not jekyll_post: | |
return False, "Claude returned an empty conversion result", None | |
# Strip any markdown code fences or explanatory text if present | |
jekyll_post = self._clean_claude_response(jekyll_post) | |
return True, "Successfully converted post", jekyll_post | |
def _clean_claude_response(self, response: str) -> str: | |
""" | |
Clean Claude's response to remove any markdown fences or explanatory text | |
Args: | |
response: Claude's response string | |
Returns: | |
Cleaned Jekyll post content | |
""" | |
# Remove any explanations before the post content | |
if "---" in response: | |
front_matter_start = response.find("---") | |
if front_matter_start > 0: | |
response = response[front_matter_start:] | |
# Remove markdown code fences if present | |
response = re.sub(r'^```\w*\n', '', response) | |
response = re.sub(r'\n```$', '', response) | |
# Remove any explanations after the post content | |
end_markers = [ | |
"\nHere's the Jekyll post", | |
"\nI've converted", | |
"\nThis Jekyll post", | |
"\nThe migrated Jekyll", | |
] | |
for marker in end_markers: | |
if marker in response: | |
response = response[:response.find(marker)] | |
return response.strip() | |
def quality_check( | |
self, jekyll_post: str, original_year: int | |
) -> Tuple[bool, str, Optional[str]]: | |
""" | |
Run quality checks on Jekyll post using Claude (or simulate) | |
Args: | |
jekyll_post: Jekyll post content | |
original_year: Year the post was originally written | |
Returns: | |
Tuple of (success, message, improved_jekyll_post) | |
""" | |
# If skipping quality checks | |
if self.skip_quality_checks: | |
if not self.dry_run: | |
print("Quality checks skipped") | |
return True, "Quality checks skipped", jekyll_post | |
# Build prompt for Claude | |
prompt = QUALITY_CHECK_PROMPT.format( | |
jekyll_post=jekyll_post, original_year=original_year | |
) | |
prompt += "\n\nREMINDER: Return ONLY the improved Jekyll markdown content starting with the front matter. NO explanations, code fences, or other text." | |
# Call Claude to check and improve the post | |
success, message, response = self.run_claude(prompt) | |
if not success: | |
return False, message, None | |
if self.dry_run: | |
return True, "Simulated quality check", response.get("result", "") | |
improved_jekyll_post = response.get("result", "") | |
if not improved_jekyll_post: | |
return False, "Claude returned an empty quality check result", None | |
# Clean the response similarly to the conversion step | |
improved_jekyll_post = self._clean_claude_response(improved_jekyll_post) | |
return True, "Successfully improved post", improved_jekyll_post | |
def save_jekyll_post(self, post: Dict, jekyll_content: str) -> bool: | |
""" | |
Save Jekyll post to file (or simulate saving) | |
Args: | |
post: Original post data dictionary | |
jekyll_content: Jekyll formatted post content | |
Returns: | |
Boolean indicating success | |
""" | |
try: | |
# Extract date for filename (YYYY-MM-DD format) | |
date_obj = datetime.strptime(post["date"], "%Y-%m-%d %H:%M:%S %z") | |
date_prefix = date_obj.strftime("%Y-%m-%d") | |
# Create filename | |
slug = post.get("slug") or re.sub( | |
r"[^a-zA-Z0-9\-_]", "-", post["title"].lower() | |
) | |
filename = f"{date_prefix}-{slug}.md" | |
file_path = self.output_dir / filename | |
# If in dry run mode, just simulate saving | |
if self.dry_run: | |
print(f"[DRY RUN] Would save post to: {file_path}") | |
print(f"[DRY RUN] Post title: {post['title']}") | |
print(f"[DRY RUN] Content preview: {jekyll_content[:200]}...") | |
return True | |
# Actual file saving | |
with open(file_path, "w", encoding="utf-8") as f: | |
f.write(jekyll_content) | |
return True | |
except Exception as e: | |
print(f"Error saving file: {e}", file=sys.stderr) | |
return False | |
def process_posts(self): | |
"""Process all WordPress posts and convert them to Jekyll""" | |
# Parse all posts | |
all_posts = self.parse_wordpress_export() | |
if not all_posts: | |
print("No posts found matching the filter criteria.") | |
return | |
# Display posts and get user selection | |
selected_posts = self.display_posts_for_selection(all_posts) | |
if not selected_posts: | |
print("No posts selected for migration.") | |
return | |
if self.dry_run: | |
print("\n[DRY RUN MODE] Simulating WordPress to Jekyll migration") | |
print(f"Would process {len(selected_posts)} selected posts") | |
print(f"Input file: {self.input_file}") | |
print(f"Would save to: {self.output_dir}") | |
if self.prompt_file: | |
print(f"Would use custom prompt from: {self.prompt_file}") | |
if self.target_categories: | |
print(f"Filtered for categories: {', '.join(self.target_categories)}") | |
if self.target_tags: | |
print(f"Filtered for tags: {', '.join(self.target_tags)}") | |
print("Would simulate process for each post without actual conversion...") | |
for i, post in enumerate(selected_posts, 1): | |
post_id = post.get("wp_id", i) | |
print( | |
f"\nProcessing post {i}/{len(selected_posts)} (ID: {post_id}): {post['title']}" | |
) | |
# Step 1: Convert post to Jekyll format | |
print("Converting to Jekyll format...") | |
convert_success, convert_message, jekyll_post = self.convert_post(post) | |
if not convert_success: | |
print(f"ERROR: {convert_message}") | |
continue | |
print("Conversion successful") | |
improved_jekyll_post = jekyll_post | |
# Step 2: Run quality checks and improvements (unless skipped) | |
if not self.skip_quality_checks: | |
print("Running quality checks...") | |
check_success, check_message, quality_checked_post = self.quality_check( | |
jekyll_post, post["year"] | |
) | |
if not check_success: | |
print(f"WARNING: Quality check failed - {check_message}") | |
# Use the original conversion if quality check fails | |
else: | |
print("Quality checks completed successfully") | |
improved_jekyll_post = quality_checked_post | |
else: | |
print("Quality checks skipped") | |
# Step 3: Save the final post | |
save_success = self.save_jekyll_post(post, improved_jekyll_post) | |
if save_success: | |
print("Successfully saved Jekyll post") | |
else: | |
print("ERROR: Failed to save Jekyll post") | |
# Rate limiting between API calls | |
if i < len(selected_posts): | |
print(f"Waiting {self.rate_limit} seconds before next post...") | |
time.sleep(self.rate_limit) | |
if not self.dry_run: | |
print("\nMigration complete!") | |
else: | |
print("\nDry run complete. No actual conversions or files were created.") | |
def main(): | |
"""Main entry point for the script""" | |
parser = argparse.ArgumentParser( | |
description="Convert WordPress export to Jekyll posts using Claude" | |
) | |
parser.add_argument( | |
"--input", "-i", required=True, help="Path to WordPress export XML file" | |
) | |
parser.add_argument( | |
"--output", "-o", default="_posts", help="Directory to output Jekyll posts" | |
) | |
parser.add_argument( | |
"--prompt-file", "-p", help="Path to custom migration prompt file" | |
) | |
parser.add_argument( | |
"--rate-limit", | |
"-r", | |
type=int, | |
default=5, | |
help="Seconds to wait between Claude API calls", | |
) | |
parser.add_argument( | |
"--limit", | |
"-l", | |
type=int, | |
help="Maximum number of posts to display for selection", | |
) | |
parser.add_argument( | |
"--categories", | |
"-c", | |
help="Comma-separated list of categories to filter posts by", | |
) | |
parser.add_argument( | |
"--tags", | |
"-t", | |
help="Comma-separated list of tags to filter posts by", | |
) | |
parser.add_argument( | |
"--timeout", | |
type=int, | |
default=300, | |
help="Timeout in seconds for Claude API calls (default: 300)", | |
) | |
parser.add_argument( | |
"--skip-quality-checks", | |
action="store_true", | |
help="Skip the quality check step", | |
) | |
parser.add_argument( | |
"--dry-run", | |
action="store_true", | |
help="Simulate the migration process without calling Claude or saving files", | |
) | |
args = parser.parse_args() | |
# Check if input file exists | |
if not os.path.isfile(args.input): | |
print(f"ERROR: Input file '{args.input}' does not exist", file=sys.stderr) | |
sys.exit(1) | |
# Check if prompt file exists if specified | |
if args.prompt_file and not os.path.isfile(args.prompt_file): | |
print( | |
f"WARNING: Prompt file '{args.prompt_file}' does not exist", file=sys.stderr | |
) | |
print("Will use default prompt instead", file=sys.stderr) | |
# Process categories and tags if provided | |
target_categories = None | |
if args.categories: | |
target_categories = set(cat.strip() for cat in args.categories.split(",")) | |
target_tags = None | |
if args.tags: | |
target_tags = set(tag.strip() for tag in args.tags.split(",")) | |
# Skip Claude CLI check if in dry run mode | |
if not args.dry_run: | |
# Check if Claude CLI is available | |
try: | |
subprocess.run(["claude", "--version"], capture_output=True, check=True) | |
except subprocess.CalledProcessError: | |
print( | |
"ERROR: Claude CLI returned an error. Make sure it's properly installed.", | |
file=sys.stderr, | |
) | |
sys.exit(1) | |
except FileNotFoundError: | |
print( | |
"ERROR: Claude CLI not found. Please install Claude Code CLI.", | |
file=sys.stderr, | |
) | |
sys.exit(1) | |
# Create and run the converter | |
converter = WpToJekyllConverter( | |
input_file=args.input, | |
output_dir=args.output, | |
prompt_file=args.prompt_file, | |
rate_limit=args.rate_limit, | |
limit=args.limit, | |
target_categories=target_categories, | |
target_tags=target_tags, | |
timeout=args.timeout, | |
skip_quality_checks=args.skip_quality_checks, | |
dry_run=args.dry_run, | |
) | |
if not args.dry_run: | |
print("Starting WordPress to Jekyll migration process") | |
print(f"Input file: {args.input}") | |
print(f"Output directory: {args.output}") | |
if args.prompt_file: | |
print(f"Using custom prompt from: {args.prompt_file}") | |
if args.limit: | |
print(f"Limiting to {args.limit} posts for display") | |
if target_categories: | |
print(f"Filtering for categories: {', '.join(target_categories)}") | |
if target_tags: | |
print(f"Filtering for tags: {', '.join(target_tags)}") | |
print(f"Claude API timeout: {args.timeout} seconds") | |
if args.skip_quality_checks: | |
print("Quality checks will be skipped") | |
converter.process_posts() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment