dstreefkerk · May 20, 2025 10:15
diff --git a/wp_to_jekyll.py b/wp_to_jekyll.py
 #!/usr/bin/env python3
 """
 WordPress to Jekyll Migration Script using Claude Code CLI

 This script:
 1. Takes a WordPress export XML file path as input
 2. Lists all matching posts based on runtime parameters
 3. Allows the user to select which posts to migrate
 4. Extracts posts from the WordPress export
 5. Uses Claude CLI to convert each post to Jekyll format (or simulates conversion with --dry-run)
 6. Runs a quality check on each converted post using Claude CLI (or simulates with --dry-run)
 7. Saves the final Jekyll posts in the specified output directory (or simulates saving with --dry-run)

 Usage:
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --dry-run
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --prompt-file jekyll-migration-prompt.md
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --limit 5
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --categories technology,programming --tags windows,scripting
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --timeout 600
    python3 wp_to_jekyll.py --input wordpress_export.xml --output _posts --skip-quality-checks
 """

 import argparse
 import json
 import os
 import re
 import subprocess
 import sys
 import time
 import xml.etree.ElementTree as ET
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Set

 # Namespaces used in WordPress export XML
 NAMESPACES = {
    "wp": "http://wordpress.org/export/1.2/",
    "content": "http://purl.org/rss/1.0/modules/content/",
    "excerpt": "http://wordpress.org/export/1.2/excerpt/",
    "dc": "http://purl.org/dc/elements/1.1/",
 }

 # Default Claude prompt templates (updated to explicitly avoid unwanted text)
 DEFAULT_CONVERSION_PROMPT = """
 # WordPress to GitHub Pages Jekyll Blog Migration Prompt

 ## Purpose
 Convert the provided WordPress post content into a Jekyll-compatible Markdown format suitable for GitHub Pages, preserving content integrity and adapting to Jekyll formatting standards.

 ## Output Format
 CRITICAL: Your response must ONLY contain the Jekyll post content itself, starting with the front matter (---). DO NOT include ANY wrapper text, explanations, markdown code block fences (```), filenames, or notes about the conversion process. 

 The output should start with the front matter (---) and continue directly with the post content. Nothing else.

 ## Instructions
 Convert the provided WordPress post into a Jekyll-compatible Markdown format following these specific guidelines:

 ### Front Matter
 1. Create Jekyll front matter with the following elements:
   - `layout: post`
   - `title: "[Original Post Title]"` (preserve exact title in quotes)
   - `date: [Original Publication Date in YYYY-MM-DD format]`
   - `categories: [Appropriate categories from existing Jekyll blog]`
   - `tags: [Appropriate tags, using hyphenated format for multi-word tags]`
   - `author: "[Original author name]"` (use dc:creator and wp:author to extract from the source)
   - `excerpt: "[Short description of the post content]"` (Create a concise, informative excerpt)

 2. For categories and tags:
   - Use lowercase, hyphenated format (e.g., `azure-monitor` not `AzureMonitor`)
   - Prioritize categories already in use on the Jekyll blog, if available
   - Limit categories to 2-3 primary topics
   - Include original WordPress tags as Jekyll tags where relevant

 ### Content Formatting
 1. Convert WordPress HTML formatting to Markdown:
   - Replace `<a href="URL">text</a>` with `[text](URL)`
   - Replace HTML lists with Markdown lists
   - Replace HTML formatting tags with Markdown equivalents
   - Remove WordPress-specific formatting elements

 2. Code and Gist Handling:
   - When code is presented in the original WordPress post as a GitHub gist:
     - Preserve the gist embed exactly as is, using the script tag format:
       ```html
       <script src="https://gist.github.com/[username]/[gist-id].js"></script>
       ```
     - Do NOT convert gists to markdown code blocks
   - For regular code blocks not in gists:
     - Use triple backticks with appropriate language specification
     - For KQL queries, use `kusto` as the language indicator
     - For PowerShell code, use `powershell` as the language indicator
     - When formatting PowerShell code:
       - Preserve case sensitivity for cmdlets (e.g., `Get-Process`, not `get-process`)
       - Maintain proper PowerShell styling conventions (CamelCase for variables, PascalCase for functions)
       - Ensure pipe characters `|` and operators are surrounded by spaces
     - Ensure no blank rows contain spaces or tabs

 3. Media:
   - For images in the original WordPress post:
     - Retain the image if it is hosted on the same domain as the Jekyll blog
     - Otherwise, remove the original image tag or link and replace with a descriptive note in italics:
       ```
       *[Image removed during migration: Brief description of what the image showed]*
       ```
     - Make the description informative enough that readers understand what visual content was there
   - For embedded content that should remain externally hosted (like videos, GitHub gists, etc.):
     - Keep the original embed code if compatible with GitHub Pages
     - Otherwise, replace with a link to the content

 4. Spelling and Language:
   - Maintain the original tone and voice
   - Migrate content without amending the original meaning or wording
   - Ensure Australian English spelling is used throughout

 ### Special Considerations
 1. Remove WordPress-specific HTML comments or metadata
 2. Remove social sharing buttons or WordPress-specific elements
 3. Preserve internal post structure and headings
 4. If any internal links point to other WordPress posts, consider updating them to their Jekyll equivalents
 5. For any links to Microsoft documentation, keep them as-is as they're unlikely to have changed
 6. Add a note at the top of the post for older content indicating it's outdated using this format:
   ```
   > **Note:** This article was originally written in [Month Year] and is now over [X] years old. Due to changes in [relevant technology] over time, the described solution may no longer work as written. Please consider this guide as a conceptual reference rather than a current implementation guide.
   ```

 CRITICAL REMINDER: Your output must contain ONLY the Jekyll markdown content. NO explanations, NO code fences, NO "here's the converted post" text.
 """

 QUALITY_CHECK_PROMPT = """
 Review and improve this Jekyll post:

 {jekyll_post}

 Make the following improvements and return the complete improved post:

 1. Check for broken URLs and update them if possible, or flag them if there's no update available
 2. Note any potential security issues at the end of the post
 3. Flag any outdated tools or techniques mentioned in the post (this post was written in {original_year})
 4. Ensure Australian English spelling is used throughout
 5. Keep the original content and meaning intact but improve clarity if needed
 6. Return the entire corrected Jekyll post with all front matter

 CRITICAL: Your output must contain ONLY the Jekyll markdown content. NO explanations, NO code fences, NO "here's the corrected post" text, and NO filename suggestions. Your response must begin with "---" for the front matter and contain nothing else except the Jekyll post content.
 """


 class WpToJekyllConverter:
    """WordPress to Jekyll converter using Claude CLI"""

    def __init__(
        self,
        input_file: str,
        output_dir: str,
        prompt_file: Optional[str] = None,
        rate_limit: int = 5,
        limit: Optional[int] = None,
        target_categories: Optional[Set[str]] = None,
        target_tags: Optional[Set[str]] = None,
        timeout: int = 300,
        skip_quality_checks: bool = False,
        dry_run: bool = False,
    ):
        """
        Initialize the converter

        Args:
            input_file: Path to WordPress export XML file
            output_dir: Directory to output Jekyll posts
            prompt_file: Path to custom migration prompt file
            rate_limit: Seconds to wait between Claude API calls
            limit: Maximum number of posts to process
            target_categories: Set of categories to filter posts by
            target_tags: Set of tags to filter posts by
            timeout: Timeout in seconds for Claude API calls
            skip_quality_checks: Skip quality check step if True
            dry_run: Simulate conversion without calling Claude or saving files
        """
        self.input_file = input_file
        self.output_dir = Path(output_dir)
        self.prompt_file = prompt_file
        self.rate_limit = rate_limit
        self.limit = limit
        self.target_categories = target_categories
        self.target_tags = target_tags
        self.timeout = timeout
        self.skip_quality_checks = skip_quality_checks
        self.dry_run = dry_run
        self.conversion_prompt = DEFAULT_CONVERSION_PROMPT

        # Create output directory if it doesn't exist (only if not in dry run)
        if not self.dry_run:
            self.output_dir.mkdir(parents=True, exist_ok=True)

        # Load custom prompt if specified
        if self.prompt_file:
            self.load_custom_prompt()

    def load_custom_prompt(self):
        """Load custom migration prompt from file"""
        try:
            with open(self.prompt_file, "r", encoding="utf-8") as f:
                custom_prompt = f.read()

            if custom_prompt:
                if not self.dry_run:
                    print(f"Loaded custom migration prompt from {self.prompt_file}")
                self.conversion_prompt = custom_prompt
        except FileNotFoundError:
            print(
                f"Warning: Custom prompt file '{self.prompt_file}' not found. Using default prompt.",
                file=sys.stderr,
            )
        except Exception as e:
            print(f"Error loading custom prompt: {e}", file=sys.stderr)
            print("Using default prompt instead.", file=sys.stderr)

    def parse_wordpress_export(self) -> List[Dict]:
        """
        Parse WordPress export XML and extract posts

        Returns:
            List of dictionaries containing post data
        """
        try:
            tree = ET.parse(self.input_file)
            root = tree.getroot()

            # Find all item elements that are posts
            posts = []
            for item in root.findall(".//item"):
                post_type_elem = item.find("./wp:post_type", NAMESPACES)
                status_elem = item.find("./wp:status", NAMESPACES)
                
                # Skip if required elements are missing
                if post_type_elem is None or status_elem is None:
                    continue
                
                # Only process published posts
                if post_type_elem.text == "post" and status_elem.text == "publish":
                    # Extract post data
                    title_elem = item.find("./title")
                    link_elem = item.find("./link")
                    pub_date_elem = item.find("./pubDate")
                    content_elem = item.find("./content:encoded", NAMESPACES)
                    wp_id_elem = item.find("./wp:post_id", NAMESPACES)
                    slug_elem = item.find("./wp:post_name", NAMESPACES)
                    
                    # Skip if required elements are missing
                    if (title_elem is None or link_elem is None or pub_date_elem is None or 
                        content_elem is None or wp_id_elem is None or slug_elem is None):
                        continue
                    
                    title = title_elem.text
                    link = link_elem.text
                    pub_date = pub_date_elem.text
                    content = content_elem.text
                    wp_id = wp_id_elem.text
                    slug = slug_elem.text

                    # Parse date
                    date_obj = datetime.strptime(pub_date, "%a, %d %b %Y %H:%M:%S %z")
                    formatted_date = date_obj.strftime("%Y-%m-%d %H:%M:%S %z")

                    # Extract categories and tags
                    categories = []
                    tags = []

                    for category in item.findall("./category"):
                        domain = category.get("domain")
                        if domain == "category":
                            categories.append(category.text)
                        elif domain == "post_tag":
                            tags.append(category.text)

                    # Check if we need to filter by categories or tags
                    if self.target_categories and not any(
                        cat.lower() in [c.lower() for c in self.target_categories]
                        for cat in categories
                    ):
                        continue

                    if self.target_tags and not any(
                        tag.lower() in [t.lower() for t in self.target_tags]
                        for tag in tags
                    ):
                        continue

                    # Create post data dictionary
                    post_data = {
                        "title": title,
                        "link": link,
                        "date": formatted_date,
                        "year": date_obj.year,
                        "content": content,
                        "categories": categories,
                        "tags": tags,
                        "wp_id": wp_id,
                        "slug": slug,
                    }

                    posts.append(post_data)

            if not self.dry_run:
                print(f"Found {len(posts)} posts to convert")
            
            # Apply limit if specified
            if self.limit is not None and len(posts) > self.limit:
                posts = posts[:self.limit]
                if not self.dry_run:
                    print(f"Limiting to {self.limit} posts as requested")
                
            return posts

        except ET.ParseError as e:
            print(f"Error parsing XML file: {e}", file=sys.stderr)
            sys.exit(1)
        except Exception as e:
            print(f"Error processing WordPress export: {e}", file=sys.stderr)
            sys.exit(1)

    def display_posts_for_selection(self, posts: List[Dict]) -> List[Dict]:
        """
        Display posts and allow user to select which ones to migrate

        Args:
            posts: List of post dictionaries

        Returns:
            List of selected post dictionaries
        """
        if not posts:
            print("No posts found matching the filter criteria.")
            return []
            
        print("\n===== Available Posts =====")
        print(f"Found {len(posts)} posts matching your criteria.\n")
        
        # Display posts with numbers for selection
        for i, post in enumerate(posts, 1):
            date_obj = datetime.strptime(post["date"], "%Y-%m-%d %H:%M:%S %z")
            formatted_date = date_obj.strftime("%Y-%m-%d")
            categories_str = ", ".join(post["categories"])
            tags_str = ", ".join(post["tags"])
            
            print(f"{i}. [{formatted_date}] {post['title']}")
            print(f"   Categories: {categories_str}")
            print(f"   Tags: {tags_str}")
            print(f"   ID: {post['wp_id']}")
            print()
            
        if self.dry_run:
            print("[DRY RUN] Would ask for user selection. Selecting all posts for simulation.")
            return posts
            
        # Get user selection
        while True:
            try:
                selection_input = input("\nEnter the numbers of posts to migrate (comma-separated), 'all' for all posts, or 'quit' to exit: ")
                
                if selection_input.lower() == 'quit':
                    print("Migration cancelled.")
                    sys.exit(0)
                    
                if selection_input.lower() == 'all':
                    return posts
                    
                # Parse the selection indices
                selection_indices = [int(idx.strip()) for idx in selection_input.split(',')]
                
                # Validate the indices
                if any(idx < 1 or idx > len(posts) for idx in selection_indices):
                    print(f"Invalid selection. Please enter numbers between 1 and {len(posts)}.")
                    continue
                    
                # Get the selected posts
                selected_posts = [posts[idx-1] for idx in selection_indices]
                
                if not selected_posts:
                    print("No posts selected. Please try again.")
                    continue
                    
                print(f"\nSelected {len(selected_posts)} posts for migration.")
                return selected_posts
                
            except ValueError:
                print("Invalid input. Please enter comma-separated numbers, 'all', or 'quit'.")
                continue
            except Exception as e:
                print(f"Error: {e}")
                print("Please try again.")
                continue

    def run_claude(
        self, prompt: str, output_format: str = "json"
    ) -> Tuple[bool, str, Optional[Dict]]:
        """
        Simulate or run Claude CLI with the given prompt

        Args:
            prompt: Prompt to send to Claude
            output_format: Output format (json or text)

        Returns:
            Tuple of (success, message, response_data)
        """
        if self.dry_run:
            # Simulate successful Claude API call in dry run
            return True, "Simulated Claude call (dry run)", {
                "result": f"Simulated Jekyll conversion for prompt: {prompt[:100]}..."
            }

        try:
            # Run Claude CLI with the prompt
            result = subprocess.run(
                ["claude", "-p", prompt, "--output-format", output_format],
                capture_output=True,
                text=True,
                check=True,
                timeout=self.timeout,
            )

            if output_format == "json":
                # Parse JSON output
                response_data = json.loads(result.stdout)
                return True, "Success", response_data
            else:
                return True, "Success", result.stdout

        except subprocess.TimeoutExpired:
            error_msg = f"Claude CLI timed out after {self.timeout} seconds"
            return False, error_msg, None
        except subprocess.CalledProcessError as e:
            error_msg = f"Claude CLI error (exit code {e.returncode}): {e.stderr}"
            return False, error_msg, None
        except json.JSONDecodeError as e:
            error_msg = f"Error parsing Claude response as JSON: {e}"
            return False, error_msg, None
        except Exception as e:
            error_msg = f"Unexpected error: {e}"
            return False, error_msg, None

    def convert_post(self, post: Dict) -> Tuple[bool, str, Optional[str]]:
        """
        Convert a WordPress post to Jekyll format using Claude (or simulate)

        Args:
            post: Dictionary containing post data

        Returns:
            Tuple of (success, message, jekyll_post)
        """
        # Build prompt for Claude
        prompt = self.conversion_prompt + "\n\nHere's the WordPress post:\n\n"
        prompt += f"Title: {post['title']}\n"
        prompt += f"Date: {post['date']}\n"
        prompt += f"Categories: {', '.join(post['categories'])}\n"
        prompt += f"Tags: {', '.join(post['tags'])}\n"
        prompt += f"Content:\n{post['content']}\n"
        prompt += "\nREMINDER: Return ONLY the Jekyll markdown content starting with the front matter. NO explanations, code fences, or other text."

        # Call Claude to convert the post
        success, message, response = self.run_claude(prompt)

        if not success:
            return False, message, None

        if self.dry_run:
            return True, "Simulated post conversion", response.get("result", "")

        jekyll_post = response.get("result", "")

        if not jekyll_post:
            return False, "Claude returned an empty conversion result", None

        # Strip any markdown code fences or explanatory text if present
        jekyll_post = self._clean_claude_response(jekyll_post)

        return True, "Successfully converted post", jekyll_post

    def _clean_claude_response(self, response: str) -> str:
        """
        Clean Claude's response to remove any markdown fences or explanatory text
        
        Args:
            response: Claude's response string
            
        Returns:
            Cleaned Jekyll post content
        """
        # Remove any explanations before the post content
        if "---" in response:
            front_matter_start = response.find("---")
            if front_matter_start > 0:
                response = response[front_matter_start:]
        
        # Remove markdown code fences if present
        response = re.sub(r'^```\w*\n', '', response)
        response = re.sub(r'\n```$', '', response)
        
        # Remove any explanations after the post content
        end_markers = [
            "\nHere's the Jekyll post",
            "\nI've converted",
            "\nThis Jekyll post",
            "\nThe migrated Jekyll",
        ]
        
        for marker in end_markers:
            if marker in response:
                response = response[:response.find(marker)]
        
        return response.strip()

    def quality_check(
        self, jekyll_post: str, original_year: int
    ) -> Tuple[bool, str, Optional[str]]:
        """
        Run quality checks on Jekyll post using Claude (or simulate)

        Args:
            jekyll_post: Jekyll post content
            original_year: Year the post was originally written

        Returns:
            Tuple of (success, message, improved_jekyll_post)
        """
        # If skipping quality checks
        if self.skip_quality_checks:
            if not self.dry_run:
                print("Quality checks skipped")
            return True, "Quality checks skipped", jekyll_post

        # Build prompt for Claude
        prompt = QUALITY_CHECK_PROMPT.format(
            jekyll_post=jekyll_post, original_year=original_year
        )
        prompt += "\n\nREMINDER: Return ONLY the improved Jekyll markdown content starting with the front matter. NO explanations, code fences, or other text."

        # Call Claude to check and improve the post
        success, message, response = self.run_claude(prompt)

        if not success:
            return False, message, None

        if self.dry_run:
            return True, "Simulated quality check", response.get("result", "")

        improved_jekyll_post = response.get("result", "")

        if not improved_jekyll_post:
            return False, "Claude returned an empty quality check result", None
            
        # Clean the response similarly to the conversion step
        improved_jekyll_post = self._clean_claude_response(improved_jekyll_post)

        return True, "Successfully improved post", improved_jekyll_post

    def save_jekyll_post(self, post: Dict, jekyll_content: str) -> bool:
        """
        Save Jekyll post to file (or simulate saving)

        Args:
            post: Original post data dictionary
            jekyll_content: Jekyll formatted post content

        Returns:
            Boolean indicating success
        """
        try:
            # Extract date for filename (YYYY-MM-DD format)
            date_obj = datetime.strptime(post["date"], "%Y-%m-%d %H:%M:%S %z")
            date_prefix = date_obj.strftime("%Y-%m-%d")

            # Create filename
            slug = post.get("slug") or re.sub(
                r"[^a-zA-Z0-9\-_]", "-", post["title"].lower()
            )
            filename = f"{date_prefix}-{slug}.md"
            file_path = self.output_dir / filename

            # If in dry run mode, just simulate saving
            if self.dry_run:
                print(f"[DRY RUN] Would save post to: {file_path}")
                print(f"[DRY RUN] Post title: {post['title']}")
                print(f"[DRY RUN] Content preview: {jekyll_content[:200]}...")
                return True

            # Actual file saving
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(jekyll_content)

            return True
        except Exception as e:
            print(f"Error saving file: {e}", file=sys.stderr)
            return False

    def process_posts(self):
        """Process all WordPress posts and convert them to Jekyll"""
        # Parse all posts
        all_posts = self.parse_wordpress_export()
        
        if not all_posts:
            print("No posts found matching the filter criteria.")
            return
            
        # Display posts and get user selection
        selected_posts = self.display_posts_for_selection(all_posts)
        
        if not selected_posts:
            print("No posts selected for migration.")
            return

        if self.dry_run:
            print("\n[DRY RUN MODE] Simulating WordPress to Jekyll migration")
            print(f"Would process {len(selected_posts)} selected posts")
            print(f"Input file: {self.input_file}")
            print(f"Would save to: {self.output_dir}")
            if self.prompt_file:
                print(f"Would use custom prompt from: {self.prompt_file}")
            if self.target_categories:
                print(f"Filtered for categories: {', '.join(self.target_categories)}")
            if self.target_tags:
                print(f"Filtered for tags: {', '.join(self.target_tags)}")
            print("Would simulate process for each post without actual conversion...")

        for i, post in enumerate(selected_posts, 1):
            post_id = post.get("wp_id", i)
            print(
                f"\nProcessing post {i}/{len(selected_posts)} (ID: {post_id}): {post['title']}"
            )

            # Step 1: Convert post to Jekyll format
            print("Converting to Jekyll format...")
            convert_success, convert_message, jekyll_post = self.convert_post(post)

            if not convert_success:
                print(f"ERROR: {convert_message}")
                continue

            print("Conversion successful")

            improved_jekyll_post = jekyll_post
            
            # Step 2: Run quality checks and improvements (unless skipped)
            if not self.skip_quality_checks:
                print("Running quality checks...")
                check_success, check_message, quality_checked_post = self.quality_check(
                    jekyll_post, post["year"]
                )

                if not check_success:
                    print(f"WARNING: Quality check failed - {check_message}")
                    # Use the original conversion if quality check fails
                else:
                    print("Quality checks completed successfully")
                    improved_jekyll_post = quality_checked_post
            else:
                print("Quality checks skipped")

            # Step 3: Save the final post
            save_success = self.save_jekyll_post(post, improved_jekyll_post)

            if save_success:
                print("Successfully saved Jekyll post")
            else:
                print("ERROR: Failed to save Jekyll post")

            # Rate limiting between API calls
            if i < len(selected_posts):
                print(f"Waiting {self.rate_limit} seconds before next post...")
                time.sleep(self.rate_limit)

        if not self.dry_run:
            print("\nMigration complete!")
        else:
            print("\nDry run complete. No actual conversions or files were created.")


 def main():
    """Main entry point for the script"""
    parser = argparse.ArgumentParser(
        description="Convert WordPress export to Jekyll posts using Claude"
    )
    parser.add_argument(
        "--input", "-i", required=True, help="Path to WordPress export XML file"
    )
    parser.add_argument(
        "--output", "-o", default="_posts", help="Directory to output Jekyll posts"
    )
    parser.add_argument(
        "--prompt-file", "-p", help="Path to custom migration prompt file"
    )
    parser.add_argument(
        "--rate-limit",
        "-r",
        type=int,
        default=5,
        help="Seconds to wait between Claude API calls",
    )
    parser.add_argument(
        "--limit",
        "-l",
        type=int,
        help="Maximum number of posts to display for selection",
    )
    parser.add_argument(
        "--categories",
        "-c",
        help="Comma-separated list of categories to filter posts by",
    )
    parser.add_argument(
        "--tags",
        "-t",
        help="Comma-separated list of tags to filter posts by",
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=300,
        help="Timeout in seconds for Claude API calls (default: 300)",
    )
    parser.add_argument(
        "--skip-quality-checks",
        action="store_true",
        help="Skip the quality check step",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Simulate the migration process without calling Claude or saving files",
    )

    args = parser.parse_args()

    # Check if input file exists
    if not os.path.isfile(args.input):
        print(f"ERROR: Input file '{args.input}' does not exist", file=sys.stderr)
        sys.exit(1)

    # Check if prompt file exists if specified
    if args.prompt_file and not os.path.isfile(args.prompt_file):
        print(
            f"WARNING: Prompt file '{args.prompt_file}' does not exist", file=sys.stderr
        )
        print("Will use default prompt instead", file=sys.stderr)

    # Process categories and tags if provided
    target_categories = None
    if args.categories:
        target_categories = set(cat.strip() for cat in args.categories.split(","))
        
    target_tags = None
    if args.tags:
        target_tags = set(tag.strip() for tag in args.tags.split(","))

    # Skip Claude CLI check if in dry run mode
    if not args.dry_run:
        # Check if Claude CLI is available
        try:
            subprocess.run(["claude", "--version"], capture_output=True, check=True)
        except subprocess.CalledProcessError:
            print(
                "ERROR: Claude CLI returned an error. Make sure it's properly installed.",
                file=sys.stderr,
            )
            sys.exit(1)
        except FileNotFoundError:
            print(
                "ERROR: Claude CLI not found. Please install Claude Code CLI.",
                file=sys.stderr,
            )
            sys.exit(1)

    # Create and run the converter
    converter = WpToJekyllConverter(
        input_file=args.input,
        output_dir=args.output,
        prompt_file=args.prompt_file,
        rate_limit=args.rate_limit,
        limit=args.limit,
        target_categories=target_categories,
        target_tags=target_tags,
        timeout=args.timeout,
        skip_quality_checks=args.skip_quality_checks,
        dry_run=args.dry_run,
    )

    if not args.dry_run:
        print("Starting WordPress to Jekyll migration process")
        print(f"Input file: {args.input}")
        print(f"Output directory: {args.output}")
        if args.prompt_file:
            print(f"Using custom prompt from: {args.prompt_file}")
        if args.limit:
            print(f"Limiting to {args.limit} posts for display")
        if target_categories:
            print(f"Filtering for categories: {', '.join(target_categories)}")
        if target_tags:
            print(f"Filtering for tags: {', '.join(target_tags)}")
        print(f"Claude API timeout: {args.timeout} seconds")
        if args.skip_quality_checks:
            print("Quality checks will be skipped")

    converter.process_posts()


 if __name__ == "__main__":
    main()