AndrewAltimit · October 30, 2025 14:56
diff --git a/!README.md b/!README.md
diff --git a/.dockerignore b/.dockerignore
 # Exclude large files and directories
 models/
 output/
 input/
 temp/
 custom_nodes/
 logs/
 *.safetensors
 *.pt
 *.ckpt
 *.bin
 *.pth

 # Git files
 .git/
 .gitignore

 # Documentation
 README.md
 docker-compose.override.yml.example

 # OS and IDE files
 .DS_Store
 Thumbs.db
 .vscode/
 .idea/
 *.swp
 *.swo

 # Python cache
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 .Python
 venv/
 env/

 # Build artifacts
 *.log
 .dockerignore
 Dockerfile
diff --git a/.gitignore b/.gitignore
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 env/
 venv/
 ENV/
 .venv

 # Directories removed for gist compatibility
 datasets/
 outputs/
 configs/
 logs/
 temp/

 # Docker
 docker-compose.override.yml
 .env

 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 *~

 # OS
 .DS_Store
 Thumbs.db

 # Build artifacts
 build.log
 *.log
diff --git a/docker-compose.yml b/docker-compose.yml
 services:
  ai-toolkit:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        - http_proxy=${http_proxy:-}
        - https_proxy=${https_proxy:-}
        - no_proxy=${no_proxy:-}
      network: host  # Use host network during build for better DNS resolution
    container_name: ai-toolkit-trainer-server
    runtime: nvidia
    shm_size: '2gb'
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
    ports:
      - "8675:8675"
    volumes:
      # Training data and outputs
      - ./datasets:/ai-toolkit/datasets
      - ./outputs:/ai-toolkit/outputs
      - ./configs:/ai-toolkit/configs
      
      # Temp directory for processing
      - ./temp:/ai-toolkit/temp
      
      # Optional: Share host DNS for better resolution (Linux)
      # Uncomment if experiencing DNS issues
      # - /etc/resolv.conf:/etc/resolv.conf:ro
    
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8675"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    
    networks:
      - ai-toolkit-network
    
    command: ai-toolkit

  mcp-ai-toolkit-server:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        - http_proxy=${http_proxy:-}
        - https_proxy=${https_proxy:-}
        - no_proxy=${no_proxy:-}
      network: host  # Use host network during build for better DNS resolution
    container_name: mcp-ai-toolkit-trainer
    runtime: nvidia
    shm_size: '2gb'
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
      - AI_TOOLKIT_SERVER_URL=http://ai-toolkit:8675
      - MCP_PROJECT_ROOT=/workspace
      - PYTHONPATH=/workspace/mcp_server
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
    
    volumes:
      # Share the same directories
      - ./datasets:/ai-toolkit/datasets
      - ./outputs:/ai-toolkit/outputs
      - ./configs:/ai-toolkit/configs
      
      # Logs
      - ./logs:/workspace/logs
      
      # Optional: Share host DNS for better resolution (Linux)
      # Uncomment if experiencing DNS issues
      # - /etc/resolv.conf:/etc/resolv.conf:ro
    
    depends_on:
      ai-toolkit:
        condition: service_healthy
    
    stdin_open: true
    tty: true
    
    networks:
      - ai-toolkit-network
    
    command: mcp
    
    healthcheck:
      test: ["CMD", "python", "-c", "import mcp, websocket; print('OK')"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

  # Optional: MCP HTTP API server
  # Provides HTTP access to MCP tools
  mcp-http-server:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        - http_proxy=${http_proxy:-}
        - https_proxy=${https_proxy:-}
        - no_proxy=${no_proxy:-}
      network: host
    container_name: mcp-ai-toolkit-http
    runtime: nvidia
    shm_size: '2gb'
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
      - AI_TOOLKIT_SERVER_URL=http://ai-toolkit:8675
      - MCP_HTTP_PORT=8190
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
    
    ports:
      - "8190:8190"
    
    volumes:
      # Share the same directories
      - ./datasets:/ai-toolkit/datasets
      - ./outputs:/ai-toolkit/outputs
      - ./configs:/ai-toolkit/configs
    
    depends_on:
      ai-toolkit:
        condition: service_healthy
    
    networks:
      - ai-toolkit-network
    
    command: mcp-http
    
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8190/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

 networks:
  ai-toolkit-network:
    driver: bridge

 volumes:
  datasets-cache:
    driver: local
diff --git a/Dockerfile b/Dockerfile
 FROM nvidia/cuda:12.1.0-base-ubuntu22.04

 # Install system dependencies
 RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        python3.10 \
        python3.10-venv \
        python3-pip \
        git \
        wget \
        curl \
        unzip \
        libgl1 \
        libglib2.0-0 \
        libsm6 \
        libxext6 \
        libxrender-dev \
        libgomp1 \
        libgoogle-perftools4 \
        libtcmalloc-minimal4 \
        ca-certificates \
        gcc \
        g++ \
        build-essential \
        nodejs \
        npm \
    && rm -rf /var/lib/apt/lists/*

 # Remove existing Node.js and update to v18
 RUN apt-get update && \
    apt-get remove -y nodejs libnode-dev && \
    apt-get autoremove -y && \
    curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
    apt-get install -y nodejs && \
    rm -rf /var/lib/apt/lists/*

 # Set working directory
 WORKDIR /ai-toolkit

 # Clone AI Toolkit
 RUN git clone --depth 1 https://github.com/ostris/ai-toolkit.git . && \
    echo "AI Toolkit directories:" && ls -la

 # Create Python virtual environment
 RUN python3 -m venv venv

 # Activate venv and install PyTorch first
 RUN . venv/bin/activate && \
    pip install --upgrade pip && \
    pip install --no-cache-dir \
    torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

 # Install AI Toolkit requirements
 RUN . venv/bin/activate && \
    pip install --no-cache-dir -r requirements.txt

 # Install MCP dependencies
 RUN . venv/bin/activate && \
    pip install --no-cache-dir \
    mcp \
    pydantic \
    websocket-client \
    aiohttp \
    PyYAML

 # Build the UI
 WORKDIR /ai-toolkit/ui
 RUN npm install && npm run update_db && npm run build

 WORKDIR /ai-toolkit

 # Create directories for training data and outputs
 RUN mkdir -p \
    datasets \
    outputs \
    configs \
    temp \
    /workspace/mcp_server && \
    # Ensure database file can be created
    touch /ai-toolkit/aitk_db.db && \
    chmod 666 /ai-toolkit/aitk_db.db

 # Copy MCP server files
 COPY mcp_server.py /workspace/mcp_server/
 COPY mcp_http_server.py /workspace/mcp_server/
 COPY requirements.txt /workspace/mcp_server/

 # Install additional MCP server dependencies
 WORKDIR /workspace/mcp_server
 RUN . /ai-toolkit/venv/bin/activate && \
    pip install --no-cache-dir -r requirements.txt

 # Expose AI Toolkit UI port
 EXPOSE 8675

 # Create necessary directories with proper permissions
 RUN mkdir -p /ai-toolkit/datasets /ai-toolkit/outputs /ai-toolkit/configs /ai-toolkit/temp && \
    chmod -R 777 /ai-toolkit/datasets /ai-toolkit/outputs /ai-toolkit/configs /ai-toolkit/temp

 # Set environment variables
 ENV PYTHONPATH=/ai-toolkit:/workspace/mcp_server
 ENV AI_TOOLKIT_PATH=/ai-toolkit
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics

 # Create entrypoint script
 RUN echo '#!/bin/bash\n\
 if [ "$1" = "ai-toolkit" ]; then\n\
    cd /ai-toolkit/ui && npm run start\n\
 elif [ "$1" = "mcp" ]; then\n\
    cd /workspace/mcp_server && /ai-toolkit/venv/bin/python mcp_server.py\n\
 elif [ "$1" = "mcp-http" ]; then\n\
    cd /workspace/mcp_server && /ai-toolkit/venv/bin/python mcp_http_server.py\n\
 else\n\
    exec "$@"\n\
 fi' > /entrypoint.sh && chmod +x /entrypoint.sh

 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["ai-toolkit"]
diff --git a/example_training.py b/example_training.py
 #!/usr/bin/env python3
 """
 Example script showing how to use the AI Toolkit MCP server
 to create a training configuration and start training a LoRA model.
 """

 import asyncio
 import json
 import base64
 from pathlib import Path
 import httpx

 # MCP HTTP server URL
 MCP_URL = "http://localhost:8190"

 async def call_mcp_tool(tool_name: str, arguments: dict = None):
    """Call an MCP tool via the HTTP API"""
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{MCP_URL}/mcp/tool",
            json={
                "tool": tool_name,
                "arguments": arguments or {}
            }
        )
        return response.json()

 async def main():
    """Example workflow for training a LoRA model"""
    
    print("AI Toolkit MCP Example - Training a LoRA Model")
    print("=" * 50)
    
    # 1. List available datasets
    print("\n1. Listing available datasets...")
    result = await call_mcp_tool("list-datasets")
    print(result.get("result", "No result"))
    
    # 2. Create a training configuration
    print("\n2. Creating training configuration...")
    config_result = await call_mcp_tool(
        "create-training-config",
        {
            "name": "example-flux-lora",
            "model_name": "ostris/Flex.1-alpha",  # Using publicly accessible FLUX model
            "dataset_path": "/ai-toolkit/datasets/example-dataset",
            "resolution": 1024,
            "batch_size": 1,
            "learning_rate": 0.0002,  # Default is now 0.0002
            "steps": 1000,  # Recommended: 100 × number of images
            "rank": 16,
            "alpha": 16,
            "low_vram": True,
            "trigger_word": "example_style",
            "test_prompts": [  # Changed from validation_prompt to test_prompts
                "a photo of example_style artwork",
                "a detailed image of example_style",
                "example_style in high quality",
                "example_style in a futuristic setting with neon lights"
            ]
        }
    )
    print(config_result.get("result", "No result"))
    
    # 3. List configurations
    print("\n3. Listing training configurations...")
    configs_result = await call_mcp_tool("list-configs")
    print(configs_result.get("result", "No result"))
    
    # 4. Get system stats
    print("\n4. Getting system statistics...")
    stats_result = await call_mcp_tool("get-system-stats")
    print(stats_result.get("result", "No result"))
    
    # Example: Upload a dataset (commented out - needs real images)
    """
    print("\n5. Uploading a dataset...")
    
    # Read and encode images
    images = []
    for i, image_path in enumerate(Path("./my_images").glob("*.jpg")):
        with open(image_path, "rb") as f:
            content = base64.b64encode(f.read()).decode()
            images.append({
                "filename": image_path.name,
                "content": content,
                "caption": f"a photo of example_style artwork, image {i+1}"
            })
    
    upload_result = await call_mcp_tool(
        "upload-dataset",
        {
            "dataset_name": "example-dataset",
            "images": images
        }
    )
    print(upload_result.get("result", "No result"))
    """
    
    # Example: Start training (commented out - needs valid dataset)
    """
    print("\n6. Starting training...")
    training_result = await call_mcp_tool(
        "start-training",
        {
            "config_name": "example-flux-lora"
        }
    )
    print(training_result.get("result", "No result"))
    
    # If training started, we could monitor it
    if "job_id" in training_result:
        job_id = training_result["job_id"]
        
        # Check status
        status_result = await call_mcp_tool(
            "get-training-status",
            {
                "job_id": job_id
            }
        )
        print(f"\nTraining status: {status_result.get('result', 'No result')}")
    """
    
    # Example: List and download models (commented out - needs trained models)
    """
    print("\n7. Listing trained models...")
    models_result = await call_mcp_tool("list-exported-models")
    print(models_result.get("result", "No result"))
    
    # Download a model if available
    print("\n8. Downloading a trained model...")
    download_result = await call_mcp_tool(
        "download-model",
        {
            "model_path": "example-flux-lora/checkpoint-1000.safetensors",
            "include_metadata": True
        }
    )
    
    if download_result.get("success"):
        import json
        model_data = json.loads(download_result.get("result", "{}"))
        
        # Save the model
        if "content" in model_data:
            model_content = base64.b64decode(model_data["content"])
            output_path = Path(f"./{model_data['filename']}")
            
            with open(output_path, "wb") as f:
                f.write(model_content)
                
            print(f"Model saved to: {output_path}")
            print(f"Model size: {len(model_content) / (1024*1024):.2f} MB")
            
            if "metadata" in model_data:
                print(f"Metadata: {json.dumps(model_data['metadata'], indent=2)}")
    """

 if __name__ == "__main__":
    asyncio.run(main())
diff --git a/mcp_http_server.py b/mcp_http_server.py
 #!/usr/bin/env python3
 """
 HTTP API wrapper for MCP server - provides REST endpoints for MCP tools
 """

 import asyncio
 import json
 import logging
 import os
 import sys
 from typing import Any, Dict, List, Optional
 from contextlib import asynccontextmanager
 from datetime import datetime

 from fastapi import FastAPI, HTTPException, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, HTMLResponse
 from pydantic import BaseModel, Field
 import uvicorn

 # Configure logging
 logging.basicConfig(
    level=os.getenv('LOG_LEVEL', 'INFO'),
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)

 # Import the MCP server functions directly instead of using subprocess
 try:
    from mcp_server import (
        handle_call_tool,
        handle_list_tools,
        # Import any other necessary functions from your mcp_server
        AI_TOOLKIT_SERVER_URL,
        DATASET_DIR,
        CONFIG_DIR,
        OUTPUT_DIR,
        LOG_DIR
    )
    MCP_DIRECT_IMPORT = True
    logger.info("Successfully imported MCP server functions directly")
 except ImportError as e:
    logger.warning(f"Could not import MCP server directly: {e}")
    MCP_DIRECT_IMPORT = False

 # MCP Tool Request/Response Models
 class MCPToolRequest(BaseModel):
    """Simplified tool request for HTTP API"""
    tool: str = Field(..., description="Name of the tool to execute")
    arguments: Dict[str, Any] = Field(default_factory=dict, description="Tool arguments")

 # Available tools documentation
 AVAILABLE_TOOLS = {
    "create-training-config": {
        "description": "Create a new LoRA training configuration",
        "parameters": {
            "name": "Configuration name (required)",
            "model_name": "Base model name (required)",
            "dataset_path": "Path to dataset folder (required)",
            "resolution": "Training resolution (default: 512)",
            "batch_size": "Training batch size (default: 1)",
            "learning_rate": "Learning rate (default: 0.0002)",
            "steps": "Number of training steps (default: 1000)",
            "rank": "LoRA rank (default: 16)",
            "alpha": "LoRA alpha (default: 16)",
            "use_wandb": "Enable W&B logging (default: false)",
            "low_vram": "Enable low VRAM mode (default: true)",
            "trigger_word": "Unique trigger word for the LoRA",
            "test_prompts": "Array of test prompts",
            "disable_sampling": "Disable sample generation (default: false)"
        }
    },
    "list-training-configs": {
        "description": "List all available training configurations",
        "parameters": {}
    },
    "get-training-config": {
        "description": "Get a specific training configuration",
        "parameters": {
            "name": "Configuration name (required)"
        }
    },
    "get-training-info": {
        "description": "Get training information including trigger word and test prompts",
        "parameters": {
            "name": "Configuration name (required)"
        }
    },
    "upload-dataset": {
        "description": "Upload images with captions to create a training dataset",
        "parameters": {
            "dataset_name": "Name for the dataset (required)",
            "images": "Array of images with filename, content (base64), and caption (required)"
        }
    },
    "list-datasets": {
        "description": "List available datasets with image counts",
        "parameters": {}
    },
    "start-training": {
        "description": "Start a training job using a saved configuration",
        "parameters": {
            "config_name": "Name of the configuration to use (required)"
        }
    },
    "get-training-status": {
        "description": "Get the current status of a training job",
        "parameters": {
            "job_id": "Training job ID (required)"
        }
    },
    "stop-training": {
        "description": "Stop a running training job",
        "parameters": {
            "job_id": "Training job ID to stop (required)"
        }
    },
    "list-training-jobs": {
        "description": "List all training jobs and their statuses",
        "parameters": {}
    },
    "export-model": {
        "description": "Export a trained model in the specified format",
        "parameters": {
            "job_id": "Training job ID (required)",
            "format": "Export format: 'safetensors' or 'ckpt' (default: 'safetensors')"
        }
    },
    "list-exported-models": {
        "description": "List all trained models available in the outputs directory",
        "parameters": {}
    },
    "download-model": {
        "description": "Download a trained LoRA model as base64-encoded content",
        "parameters": {
            "model_path": "Path to the model file relative to outputs directory (required)",
            "include_metadata": "Include training metadata if available (default: true)"
        }
    },
    "get-system-stats": {
        "description": "Get AI Toolkit system statistics including GPU usage",
        "parameters": {}
    },
    "get-training-logs": {
        "description": "Retrieve logs for a specific training job",
        "parameters": {
            "job_id": "Training job ID (required)",
            "lines": "Number of log lines to retrieve (default: 100)"
        }
    }
 }

 # Global variables for health check
 last_health_check = None
 ai_toolkit_status = "unknown"

 async def check_ai_toolkit_health():
    """Check if AI Toolkit is accessible"""
    global ai_toolkit_status, last_health_check
    try:
        import aiohttp
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{AI_TOOLKIT_SERVER_URL}/", timeout=5) as response:
                if response.status == 200:
                    ai_toolkit_status = "healthy"
                else:
                    ai_toolkit_status = "unhealthy"
    except Exception as e:
        ai_toolkit_status = f"error: {str(e)}"
    
    last_health_check = datetime.now()
    return ai_toolkit_status

 @asynccontextmanager
 async def lifespan(app: FastAPI):
    """Manage application lifecycle"""
    # Startup
    logger.info("Starting MCP HTTP server...")
    
    # Check AI Toolkit health on startup
    await check_ai_toolkit_health()
    
    yield
    
    # Shutdown
    logger.info("Shutting down MCP HTTP server...")

 # Create FastAPI app
 app = FastAPI(
    title="MCP HTTP Server for AI Toolkit",
    description="HTTP API wrapper for AI Toolkit MCP server",
    version="1.0.0",
    lifespan=lifespan
 )

 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )

 @app.get("/", response_class=HTMLResponse)
 async def root():
    """API documentation"""
    html_content = f"""
    <html>
    <head>
        <title>MCP HTTP Server - AI Toolkit</title>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 40px; }}
            h1 {{ color: #333; }}
            h2 {{ color: #666; margin-top: 30px; }}
            .endpoint {{ background: #f5f5f5; padding: 10px; margin: 10px 0; border-radius: 5px; }}
            .method {{ font-weight: bold; color: #0066cc; }}
            code {{ background: #e8e8e8; padding: 2px 5px; border-radius: 3px; }}
            pre {{ background: #f0f0f0; padding: 15px; border-radius: 5px; overflow-x: auto; }}
            .tool {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
            .params {{ margin-left: 20px; }}
            .status {{ padding: 10px; margin: 10px 0; border-radius: 5px; }}
            .healthy {{ background: #d4edda; color: #155724; }}
            .unhealthy {{ background: #f8d7da; color: #721c24; }}
        </style>
    </head>
    <body>
        <h1>MCP HTTP Server - AI Toolkit</h1>
        <p>HTTP API wrapper for AI Toolkit MCP server. This provides REST endpoints for training LoRA models.</p>
        
        <div class="status {'healthy' if ai_toolkit_status == 'healthy' else 'unhealthy'}">
            <strong>AI Toolkit Status:</strong> {ai_toolkit_status}
            {f' (checked: {last_health_check.strftime("%Y-%m-%d %H:%M:%S")})' if last_health_check else ''}
        </div>
        
        <h2>Endpoints</h2>
        
        <div class="endpoint">
            <span class="method">GET</span> <code>/</code> - This documentation
        </div>
        
        <div class="endpoint">
            <span class="method">GET</span> <code>/health</code> - Health check
        </div>
        
        <div class="endpoint">
            <span class="method">POST</span> <code>/mcp/tool</code> - Execute any MCP tool
        </div>
        
        <div class="endpoint">
            <span class="method">GET</span> <code>/mcp/tools</code> - List available tools
        </div>
        
        <div class="endpoint">
            <span class="method">GET</span> <code>/datasets</code> - List datasets (convenience endpoint)
        </div>
        
        <div class="endpoint">
            <span class="method">GET</span> <code>/configs</code> - List training configurations (convenience endpoint)
        </div>
        
        <h2>Available Tools</h2>
    """
    
    for tool_name, tool_info in AVAILABLE_TOOLS.items():
        params_html = ""
        if tool_info["parameters"]:
            params_html = "<div class='params'><strong>Parameters:</strong><ul>"
            for param, desc in tool_info["parameters"].items():
                params_html += f"<li><code>{param}</code>: {desc}</li>"
            params_html += "</ul></div>"
        
        html_content += f"""
        <div class="tool">
            <h3>{tool_name}</h3>
            <p>{tool_info["description"]}</p>
            {params_html}
        </div>
        """
    
    html_content += """
        <h2>Example Usage</h2>
        
        <h3>Create Training Configuration</h3>
        <pre>curl -X POST http://localhost:8190/mcp/tool \\
  -H "Content-Type: application/json" \\
  -d '{
    "tool": "create-training-config",
    "arguments": {
      "name": "my-flux-lora",
      "model_name": "ostris/Flex.1-alpha",
      "dataset_path": "/ai-toolkit/datasets/my-dataset",
      "steps": 2000,
      "rank": 32,
      "trigger_word": "my_style",
      "test_prompts": [
        "my_style cat sitting on a couch",
        "my_style cat playing with toys",
        "my_style cat in a garden",
        "photo of a regular cat"
      ]
    }
  }'</pre>
        
        <h3>Start Training</h3>
        <pre>curl -X POST http://localhost:8190/mcp/tool \\
  -H "Content-Type: application/json" \\
  -d '{
    "tool": "start-training",
    "arguments": {
      "config_name": "my-flux-lora"
    }
  }'</pre>
        
        <h3>Check Training Status</h3>
        <pre>curl -X POST http://localhost:8190/mcp/tool \\
  -H "Content-Type: application/json" \\
  -d '{
    "tool": "get-training-status",
    "arguments": {
      "job_id": "job_12345"
    }
  }'</pre>
        
        <h3>Upload Dataset</h3>
        <pre>curl -X POST http://localhost:8190/mcp/tool \\
  -H "Content-Type: application/json" \\
  -d '{
    "tool": "upload-dataset",
    "arguments": {
      "dataset_name": "my-dataset",
      "images": [
        {
          "filename": "image1.jpg",
          "content": "BASE64_ENCODED_IMAGE_DATA",
          "caption": "A beautiful sunset over the mountains"
        },
        {
          "filename": "image2.jpg", 
          "content": "BASE64_ENCODED_IMAGE_DATA",
          "caption": "A serene lake surrounded by trees"
        }
      ]
    }
  }'</pre>
    </body>
    </html>
    """
    
    return html_content

 @app.get("/health")
 async def health_check():
    """Health check endpoint"""
    # Update AI Toolkit status
    await check_ai_toolkit_health()
    
    return {
        "status": "healthy",
        "service": "mcp-http-server",
        "ai_toolkit": ai_toolkit_status,
        "timestamp": datetime.now().isoformat(),
        "import_mode": "direct" if MCP_DIRECT_IMPORT else "subprocess"
    }

 @app.get("/mcp/tools")
 async def list_tools():
    """List all available MCP tools"""
    try:
        if MCP_DIRECT_IMPORT:
            # Call the list tools handler directly
            result = await handle_list_tools()
            tools = []
            
            # Extract tools from the result
            if result and len(result) > 0:
                # Parse the text content which should contain JSON
                text_content = result[0].text
                if text_content:
                    try:
                        tools_data = json.loads(text_content)
                        tools = tools_data.get("tools", [])
                    except json.JSONDecodeError:
                        logger.error(f"Failed to parse tools JSON: {text_content}")
            
            return {"tools": tools}
        else:
            # Fallback to returning our documented tools
            tools = []
            for name, info in AVAILABLE_TOOLS.items():
                tools.append({
                    "name": name,
                    "description": info["description"]
                })
            return {"tools": tools}
            
    except Exception as e:
        logger.error(f"Error listing tools: {e}")
        raise HTTPException(status_code=500, detail=str(e))

 @app.post("/mcp/tool")
 async def execute_tool(request: MCPToolRequest):
    """Execute an MCP tool"""
    try:
        logger.info(f"Executing tool: {request.tool} with arguments: {request.arguments}")
        
        # Call the tool handler directly
        result = await handle_call_tool(request.tool, request.arguments)
        
        # Extract the text content from the result
        if result and len(result) > 0:
            text_content = result[0].text
            
            # Try to parse as JSON if it looks like JSON
            if text_content and text_content.strip().startswith(("{", "[")):
                try:
                    return {"success": True, "result": json.loads(text_content)}
                except json.JSONDecodeError:
                    return {"success": True, "result": text_content}
            else:
                return {"success": True, "result": text_content}
        else:
            return {"success": True, "result": None}
            
    except Exception as e:
        logger.error(f"Error executing tool {request.tool}: {e}", exc_info=True)
        return {"success": False, "error": str(e)}

 # Convenience endpoints
 @app.get("/datasets")
 async def list_datasets():
    """List available datasets"""
    try:
        if MCP_DIRECT_IMPORT:
            result = await handle_call_tool("list-datasets", {})
            if result and len(result) > 0:
                text_content = result[0].text
                try:
                    return json.loads(text_content)
                except:
                    return {"datasets": [], "error": "Failed to parse response"}
        else:
            return {"datasets": [], "error": "MCP not available"}
    except Exception as e:
        logger.error(f"Error listing datasets: {e}")
        raise HTTPException(status_code=500, detail=str(e))

 @app.get("/configs")
 async def list_configs():
    """List training configurations"""
    try:
        if MCP_DIRECT_IMPORT:
            result = await handle_call_tool("list-training-configs", {})
            if result and len(result) > 0:
                text_content = result[0].text
                try:
                    return json.loads(text_content)
                except:
                    return {"configs": [], "error": "Failed to parse response"}
        else:
            return {"configs": [], "error": "MCP not available"}
    except Exception as e:
        logger.error(f"Error listing configs: {e}")
        raise HTTPException(status_code=500, detail=str(e))

 def main():
    """Main entry point"""
    host = os.getenv("MCP_HTTP_HOST", "0.0.0.0")
    port = int(os.getenv("MCP_HTTP_PORT", "8190"))
    
    logger.info(f"Starting MCP HTTP server on {host}:{port}")
    logger.info(f"Import mode: {'direct' if MCP_DIRECT_IMPORT else 'subprocess'}")
    
    uvicorn.run(
        app,
        host=host,
        port=port,
        log_level=os.getenv("LOG_LEVEL", "info").lower()
    )

 if __name__ == "__main__":
    main()
diff --git a/mcp_server.py b/mcp_server.py
 #!/usr/bin/env python3
 import os
 import json
 import uuid
 import asyncio
 import aiohttp
 import websockets
 import base64
 from pathlib import Path
 from typing import List, Dict, Any, Optional
 import logging
 from datetime import datetime
 import yaml
 import hashlib
 from PIL import Image
 from io import BytesIO

 from mcp.server.models import InitializationOptions
 import mcp.types as types
 from mcp.server import NotificationOptions, Server
 from pydantic import AnyUrl
 import mcp.server.stdio

 # Configure logging
 logging.basicConfig(
    level=os.getenv('LOG_LEVEL', 'INFO'),
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)

 # Server configuration
 AI_TOOLKIT_SERVER_URL = os.getenv('AI_TOOLKIT_SERVER_URL', 'http://localhost:8675')
 DATASET_DIR = Path("/ai-toolkit/datasets")
 OUTPUT_DIR = Path("/ai-toolkit/outputs")
 CONFIG_DIR = Path("/ai-toolkit/configs")

 # MCP Server instance
 server = Server("ai-toolkit-mcp")

 # Available tools
 MCP_TOOLS = [
    'create-training-config',
    'list-configs',
    'get-config',
    'get-training-info',
    'upload-dataset',
    'list-datasets',
    'start-training',
    'get-training-status',
    'stop-training',
    'list-training-jobs',
    'export-model',
    'list-exported-models',
    'download-model',
    'get-system-stats',
    'get-training-logs'
 ]

 class AIToolkitClient:
    """Client for interacting with AI Toolkit API"""
    
    def __init__(self, server_url: str):
        self.server_url = server_url.rstrip('/')
        self.client_id = str(uuid.uuid4())
        self.ws = None
        
    async def connect_websocket(self):
        """Connect to AI Toolkit websocket if available"""
        try:
            ws_url = f"{self.server_url.replace('http', 'ws')}/ws?clientId={self.client_id}"
            self.ws = await websockets.connect(ws_url)
            logger.info(f"Connected to AI Toolkit websocket: {ws_url}")
        except Exception as e:
            logger.warning(f"Could not connect to websocket: {e}")
            
    async def disconnect_websocket(self):
        """Disconnect from AI Toolkit websocket"""
        if self.ws:
            await self.ws.close()
            self.ws = None
            
    async def start_training(self, config_path: str) -> dict:
        """Start a training job with the given configuration"""
        # First, load the config to get the job name
        with open(config_path, 'r') as f:
            config = yaml.safe_load(f)
        
        job_name = config.get("config", {}).get("name", "unnamed_job")
        
        # Create or update the job in the database
        async with aiohttp.ClientSession() as session:
            # First, try to get the job
            async with session.get(f"{self.server_url}/api/jobs/{job_name}") as resp:
                if resp.status == 404:
                    # Job doesn't exist, create it
                    create_data = {
                        "name": job_name,
                        "gpu_ids": "0",  # Default to GPU 0, could be made configurable
                        "job_config": config  # Pass the config object directly, AI Toolkit will stringify it
                    }
                    async with session.post(f"{self.server_url}/api/jobs", json=create_data) as create_resp:
                        if create_resp.status != 200:
                            error_text = await create_resp.text()
                            return {"success": False, "error": f"Failed to create job: {error_text}"}
                        job_data = await create_resp.json()
                        job_id = job_data.get("id", job_name)
                else:
                    job_data = await resp.json()
                    job_id = job_data.get("id", job_name)
            
            # Now start the job (AI Toolkit uses GET for starting jobs)
            async with session.get(f"{self.server_url}/api/jobs/{job_id}/start") as start_resp:
                if start_resp.status == 200:
                    return {"success": True, "job_id": job_id}
                else:
                    error_text = await start_resp.text()
                    return {"success": False, "error": f"Failed to start job: {error_text}"}
                
    async def get_training_status(self, job_id: str) -> dict:
        """Get the status of a training job"""
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{self.server_url}/api/jobs/{job_id}") as resp:
                if resp.status == 200:
                    job_data = await resp.json()
                    return {
                        "found": True,
                        "status": job_data.get("status", "Unknown"),
                        "progress": job_data.get("step", 0),
                        "current_step": job_data.get("step", 0),
                        "total_steps": 100,  # This should come from the config
                        "info": job_data.get("info", ""),
                        "speed_string": job_data.get("speed_string", "")
                    }
                else:
                    return {"found": False}
                
    async def stop_training(self, job_id: str) -> dict:
        """Stop a running training job"""
        async with aiohttp.ClientSession() as session:
            async with session.post(f"{self.server_url}/api/train/stop/{job_id}") as resp:
                return await resp.json()
                
    async def get_system_stats(self) -> dict:
        """Get AI Toolkit system statistics"""
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{self.server_url}/api/system/stats") as resp:
                return await resp.json()

 # Configuration management functions
 def create_lora_config(
    name: str,
    model_name: str,
    dataset_path: str,
    resolution: int = 512,
    batch_size: int = 1,
    learning_rate: float = 2e-4,
    steps: int = 1000,
    rank: int = 16,
    alpha: int = 16,
    use_wandb: bool = False,
    low_vram: bool = True,
    trigger_word: Optional[str] = None,
    test_prompts: Optional[List[str]] = None,
    disable_sampling: bool = False
 ) -> dict:
    """Create a LoRA training configuration"""
    
    # Validate required parameters
    if not name:
        raise ValueError("Configuration name is required")
    if not model_name:
        raise ValueError("Model name is required")
    if not dataset_path:
        raise ValueError("Dataset path is required")
    
    # Determine if using FLUX/Flex or SD model
    is_flux = "flux" in model_name.lower() or "flex" in model_name.lower()
    
    # Match the Web UI format exactly
    config = {
        "job": "extension",
        "config": {
            "name": name,
            "process": [{
                "type": "ui_trainer",
                "training_folder": "/ai-toolkit/outputs",
                "sqlite_db_path": "./aitk_db.db",
                "device": "cuda",
                "trigger_word": trigger_word or "",
                "performance_log_every": 10,
                "network": {
                    "type": "lora",
                    "linear": rank,
                    "linear_alpha": alpha,
                    "conv": rank // 2,
                    "conv_alpha": alpha // 2,
                    "lokr_full_rank": True,
                    "lokr_factor": -1,
                    "network_kwargs": {
                        "ignore_if_contains": []
                    }
                },
                "save": {
                    "dtype": "bf16",
                    "save_every": max(250, steps // 10),
                    "max_step_saves_to_keep": 4,
                    "save_format": "diffusers",
                    "push_to_hub": False
                },
                "datasets": [{
                    "folder_path": dataset_path,
                    "control_path": None,
                    "mask_path": None,
                    "mask_min_value": 0.1,
                    "default_caption": "",
                    "caption_ext": "txt",
                    "caption_dropout_rate": 0.05,
                    "cache_latents_to_disk": False,
                    "is_reg": False,
                    "network_weight": 1.0,
                    "resolution": [resolution] if isinstance(resolution, int) else resolution,
                    "controls": []
                }],
                "train": {
                    "batch_size": batch_size,
                    "bypass_guidance_embedding": True,
                    "steps": steps,
                    "gradient_accumulation": 1,
                    "train_unet": True,
                    "train_text_encoder": False,
                    "gradient_checkpointing": True,
                    "noise_scheduler": "flowmatch" if is_flux else "ddpm",
                    "optimizer": "adamw8bit",
                    "timestep_type": "sigmoid" if is_flux else "linear",
                    "content_or_style": "balanced",
                    "optimizer_params": {
                        "weight_decay": 0.0001
                    },
                    "unload_text_encoder": False,
                    "lr": learning_rate,
                    "ema_config": {
                        "use_ema": False,
                        "ema_decay": 0.99
                    },
                    "skip_first_sample": False,
                    "disable_sampling": disable_sampling,
                    "dtype": "bf16",
                    "diff_output_preservation": False,
                    "diff_output_preservation_multiplier": 1,
                    "diff_output_preservation_class": "person"
                },
                "model": {
                    "name_or_path": model_name,
                    "quantize": True,
                    "quantize_te": True,
                    "arch": "flex1" if is_flux else "sd15",
                    "low_vram": low_vram,
                    "model_kwargs": {}
                },
                "sample": {
                    "sampler": "flowmatch" if is_flux else "ddim",
                    "sample_every": max(250, steps // 10),
                    "width": 1024 if is_flux else resolution,
                    "height": 1024 if is_flux else resolution,
                    "prompts": test_prompts if test_prompts else [trigger_word] if trigger_word else [],
                    "neg": "",
                    "seed": 42,
                    "walk_seed": True,
                    "guidance_scale": 4.0 if is_flux else 7.5,
                    "sample_steps": 25 if is_flux else 20,
                    "num_frames": 1,
                    "fps": 1
                }
            }]
        },
        "meta": {
            "name": "[name]",
            "version": "1.0"
        }
    }
    
    # Trigger word is already set in the config
    # Test prompts are already handled in the sample section
        
    # Add wandb configuration if enabled
    if use_wandb:
        config["config"]["process"][0]["wandb"] = {
            "enabled": True,
            "project": f"lora-training-{name}",
            "run_name": name
        }
        
    # Store training metadata separately for easy retrieval
    config["training_metadata"] = {
        "trigger_word": trigger_word,
        "test_prompts": config["config"]["process"][0]["sample"]["prompts"],
        "model_name": model_name,
        "dataset_path": dataset_path,
        "resolution": resolution,
        "steps": steps,
        "rank": rank,
        "alpha": alpha,
        "learning_rate": learning_rate
    }
        
    return config

 def list_configs() -> List[str]:
    """List available training configurations"""
    configs = []
    if CONFIG_DIR.exists():
        for file in CONFIG_DIR.glob("*.yaml"):
            configs.append(file.stem)
    return sorted(configs)

 def load_config(name: str) -> Optional[dict]:
    """Load a training configuration by name"""
    config_path = CONFIG_DIR / f"{name}.yaml"
    if config_path.exists():
        with open(config_path, 'r') as f:
            return yaml.safe_load(f)
    return None

 def save_config(name: str, config: dict) -> str:
    """Save a training configuration"""
    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
    config_path = CONFIG_DIR / f"{name}.yaml"
    with open(config_path, 'w') as f:
        yaml.dump(config, f, default_flow_style=False)
    return str(config_path)

 def list_datasets() -> List[dict]:
    """List available datasets"""
    datasets = []
    if DATASET_DIR.exists():
        for dataset_dir in DATASET_DIR.iterdir():
            if dataset_dir.is_dir():
                # Count images in dataset
                image_count = len(list(dataset_dir.glob("*.jpg")) + 
                                list(dataset_dir.glob("*.png")) + 
                                list(dataset_dir.glob("*.jpeg")))
                datasets.append({
                    "name": dataset_dir.name,
                    "path": str(dataset_dir),
                    "image_count": image_count
                })
    return sorted(datasets, key=lambda x: x["name"])

 def get_file_signature(file_data: bytes, file_size: int) -> str:
    """
    Generate a file signature compatible with AI Toolkit's format
    Format: filesize:hash (hash is first 8 chars of MD5 hex as decimal)
    """
    # Use first 1024 bytes for signature (same as AI Toolkit)
    data_for_hash = file_data[:1024]
    hash_hex = hashlib.md5(data_for_hash).hexdigest()[:8]
    hash_value = int(hash_hex, 16)
    return f"{file_size}:{hash_value}"

 def create_aitk_metadata(dataset_path: Path) -> None:
    """
    Create .aitk_size.json metadata file for AI Toolkit compatibility
    """
    metadata = {
        "__version__": "0.1.2"
    }
    
    # Process all images in the dataset
    for img_file in dataset_path.glob("*.jpg"):
        with open(img_file, 'rb') as f:
            file_data = f.read()
        
        # Get image dimensions
        img = Image.open(BytesIO(file_data))
        width, height = img.size
        
        # Get file size and signature
        file_size = len(file_data)
        signature = get_file_signature(file_data, file_size)
        
        # Use relative path with backslash prefix (AI Toolkit format)
        file_key = f"\\{img_file.name}"
        metadata[file_key] = [width, height, signature]
    
    # Also process PNG files
    for img_file in dataset_path.glob("*.png"):
        with open(img_file, 'rb') as f:
            file_data = f.read()
        
        img = Image.open(BytesIO(file_data))
        width, height = img.size
        file_size = len(file_data)
        signature = get_file_signature(file_data, file_size)
        
        file_key = f"\\{img_file.name}"
        metadata[file_key] = [width, height, signature]
    
    # Save metadata file
    metadata_path = dataset_path / ".aitk_size.json"
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)

 def list_output_models() -> List[dict]:
    """List all trained models in the outputs directory"""
    models = []
    if OUTPUT_DIR.exists():
        # Look for safetensors and ckpt files recursively
        for model_file in OUTPUT_DIR.rglob("*.safetensors"):
            models.append({
                "name": model_file.name,
                "path": str(model_file.relative_to(OUTPUT_DIR)),
                "size": model_file.stat().st_size,
                "modified": datetime.fromtimestamp(model_file.stat().st_mtime).isoformat()
            })
        for model_file in OUTPUT_DIR.rglob("*.ckpt"):
            models.append({
                "name": model_file.name,
                "path": str(model_file.relative_to(OUTPUT_DIR)),
                "size": model_file.stat().st_size,
                "modified": datetime.fromtimestamp(model_file.stat().st_mtime).isoformat()
            })
    return sorted(models, key=lambda x: x["modified"], reverse=True)

 @server.list_tools()
 async def handle_list_tools() -> List[types.Tool]:
    """Return list of available tools"""
    return [
        types.Tool(
            name="create-training-config",
            description="Create a new LoRA training configuration",
            inputSchema={
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "Name for the training job"},
                    "model_name": {"type": "string", "description": "Base model name (e.g., 'black-forest-labs/FLUX.1-dev')"},
                    "dataset_path": {"type": "string", "description": "Path to the dataset folder"},
                    "resolution": {"type": "integer", "description": "Training resolution", "default": 512},
                    "batch_size": {"type": "integer", "description": "Batch size", "default": 1},
                    "learning_rate": {"type": "number", "description": "Learning rate", "default": 0.0002},
                    "steps": {"type": "integer", "description": "Training steps", "default": 1000},
                    "rank": {"type": "integer", "description": "LoRA rank", "default": 16},
                    "alpha": {"type": "integer", "description": "LoRA alpha", "default": 16},
                    "use_wandb": {"type": "boolean", "description": "Enable Weights & Biases logging", "default": False},
                    "low_vram": {"type": "boolean", "description": "Enable low VRAM mode", "default": True},
                    "trigger_word": {"type": "string", "description": "Trigger word for the LoRA"},
                    "test_prompts": {
                        "type": "array", 
                        "description": "Test prompts for validation (4 recommended: 3 similar, 1 unique). All should use the trigger word",
                        "items": {"type": "string"}
                    },
                    "disable_sampling": {"type": "boolean", "description": "Disable sample image generation during training", "default": False}
                },
                "required": ["name", "model_name", "dataset_path"]
            }
        ),
        types.Tool(
            name="list-configs",
            description="List available training configurations",
            inputSchema={
                "type": "object",
                "properties": {},
                "required": []
            }
        ),
        types.Tool(
            name="get-config",
            description="Get a specific training configuration",
            inputSchema={
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "Configuration name"}
                },
                "required": ["name"]
            }
        ),
        types.Tool(
            name="get-training-info",
            description="Get training information including trigger word and test prompts",
            inputSchema={
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "Configuration name"}
                },
                "required": ["name"]
            }
        ),
        types.Tool(
            name="upload-dataset",
            description="Upload images to create a new dataset",
            inputSchema={
                "type": "object",
                "properties": {
                    "dataset_name": {"type": "string", "description": "Name for the dataset"},
                    "images": {
                        "type": "array",
                        "description": "Array of images with their captions",
                        "items": {
                            "type": "object",
                            "properties": {
                                "filename": {"type": "string", "description": "Image filename"},
                                "content": {"type": "string", "description": "Base64-encoded image content"},
                                "caption": {"type": "string", "description": "Caption for the image"}
                            },
                            "required": ["filename", "content", "caption"]
                        }
                    }
                },
                "required": ["dataset_name", "images"]
            }
        ),
        types.Tool(
            name="list-datasets",
            description="List available datasets",
            inputSchema={
                "type": "object",
                "properties": {},
                "required": []
            }
        ),
        types.Tool(
            name="start-training",
            description="Start a training job with a configuration",
            inputSchema={
                "type": "object",
                "properties": {
                    "config_name": {"type": "string", "description": "Name of the configuration to use"}
                },
                "required": ["config_name"]
            }
        ),
        types.Tool(
            name="get-training-status",
            description="Get the status of a training job",
            inputSchema={
                "type": "object",
                "properties": {
                    "job_id": {"type": "string", "description": "Training job ID"}
                },
                "required": ["job_id"]
            }
        ),
        types.Tool(
            name="stop-training",
            description="Stop a running training job",
            inputSchema={
                "type": "object",
                "properties": {
                    "job_id": {"type": "string", "description": "Training job ID to stop"}
                },
                "required": ["job_id"]
            }
        ),
        types.Tool(
            name="list-training-jobs",
            description="List all training jobs",
            inputSchema={
                "type": "object",
                "properties": {},
                "required": []
            }
        ),
        types.Tool(
            name="export-model",
            description="Export a trained model",
            inputSchema={
                "type": "object",
                "properties": {
                    "job_id": {"type": "string", "description": "Training job ID"},
                    "format": {"type": "string", "description": "Export format", "enum": ["safetensors", "ckpt"], "default": "safetensors"}
                },
                "required": ["job_id"]
            }
        ),
        types.Tool(
            name="list-exported-models",
            description="List exported models",
            inputSchema={
                "type": "object",
                "properties": {},
                "required": []
            }
        ),
        types.Tool(
            name="download-model",
            description="Download a trained LoRA model as base64-encoded content",
            inputSchema={
                "type": "object",
                "properties": {
                    "model_path": {"type": "string", "description": "Path to the model file (relative to outputs directory)"},
                    "include_metadata": {"type": "boolean", "description": "Include training metadata if available", "default": True}
                },
                "required": ["model_path"]
            }
        ),
        types.Tool(
            name="get-system-stats",
            description="Get AI Toolkit system statistics",
            inputSchema={
                "type": "object",
                "properties": {},
                "required": []
            }
        ),
        types.Tool(
            name="get-training-logs",
            description="Get logs for a training job",
            inputSchema={
                "type": "object",
                "properties": {
                    "job_id": {"type": "string", "description": "Training job ID"},
                    "lines": {"type": "integer", "description": "Number of log lines to retrieve", "default": 100}
                },
                "required": ["job_id"]
            }
        )
    ]

 @server.call_tool()
 async def handle_call_tool(
    name: str, 
    arguments: dict | None
 ) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
    """Handle tool execution requests"""
    
    if name not in MCP_TOOLS:
        raise ValueError(f"Unknown tool: {name}")
        
    # Ensure arguments is not None
    if arguments is None:
        arguments = {}
        
    client = AIToolkitClient(AI_TOOLKIT_SERVER_URL)
    
    try:
        if name == "create-training-config":
            # Validate required parameters
            required_params = ["name", "model_name", "dataset_path"]
            missing_params = [p for p in required_params if not arguments.get(p)]
            if missing_params:
                return [types.TextContent(
                    type="text",
                    text=f"Error: Missing required parameters: {', '.join(missing_params)}"
                )]
            
            # Create training configuration
            config = create_lora_config(
                name=arguments.get("name"),
                model_name=arguments.get("model_name"),
                dataset_path=arguments.get("dataset_path"),
                resolution=arguments.get("resolution", 512),
                batch_size=arguments.get("batch_size", 1),
                learning_rate=arguments.get("learning_rate", 2e-4),
                steps=arguments.get("steps", 1000),
                rank=arguments.get("rank", 16),
                alpha=arguments.get("alpha", 16),
                use_wandb=arguments.get("use_wandb", False),
                low_vram=arguments.get("low_vram", True),
                trigger_word=arguments.get("trigger_word"),
                test_prompts=arguments.get("test_prompts"),
                disable_sampling=arguments.get("disable_sampling", False)
            )
            
            # Save configuration
            config_path = save_config(arguments.get("name"), config)
            
            result = f"Created training configuration '{arguments.get('name')}':\n"
            result += f"- Saved to: {config_path}\n"
            result += f"- Model: {arguments.get('model_name')}\n"
            result += f"- Dataset: {arguments.get('dataset_path')}\n"
            result += f"- Steps: {arguments.get('steps', 1000)}\n"
            result += f"- LoRA rank: {arguments.get('rank', 16)}"
                
            return [types.TextContent(type="text", text=result)]
            
        elif name == "list-configs":
            configs = list_configs()
            if configs:
                result = "Available training configurations:\n"
                result += "\n".join([f"- {c}" for c in configs])
            else:
                result = "No training configurations found."
            return [types.TextContent(type="text", text=result)]
            
        elif name == "get-config":
            config_name = arguments.get("name")
            config = load_config(config_name)
            if config:
                result = f"Configuration '{config_name}':\n\n{yaml.dump(config, default_flow_style=False)}"
            else:
                result = f"Configuration '{config_name}' not found."
            return [types.TextContent(type="text", text=result)]
            
        elif name == "get-training-info":
            config_name = arguments.get("name")
            if not config_name:
                return [types.TextContent(
                    type="text",
                    text="Error: Missing required parameter: name"
                )]
            
            config = load_config(config_name)
            if config and "training_metadata" in config:
                metadata = config["training_metadata"]
                result = f"Training information for '{config_name}':\n"
                result += f"- Trigger word: {metadata.get('trigger_word', 'Not set')}\n"
                result += f"- Model: {metadata.get('model_name', 'Unknown')}\n"
                result += f"- Resolution: {metadata.get('resolution', 'Unknown')}\n"
                result += f"- Steps: {metadata.get('steps', 'Unknown')}\n"
                result += f"- LoRA rank: {metadata.get('rank', 'Unknown')}\n"
                result += f"- Learning rate: {metadata.get('learning_rate', 'Unknown')}\n"
                
                test_prompts = metadata.get('test_prompts', [])
                if test_prompts:
                    result += f"\nTest prompts ({len(test_prompts)}):\n"
                    for i, prompt in enumerate(test_prompts, 1):
                        result += f"{i}. {prompt}\n"
                else:
                    result += "\nNo test prompts configured."
            else:
                result = f"Training information for '{config_name}' not found."
            return [types.TextContent(type="text", text=result)]
            
        elif name == "upload-dataset":
            dataset_name = arguments.get("dataset_name")
            images = arguments.get("images", [])
            
            if not dataset_name:
                return [types.TextContent(
                    type="text",
                    text="Error: Missing required parameter: dataset_name"
                )]
            
            if not images:
                return [types.TextContent(
                    type="text",
                    text="Error: No images provided. The 'images' parameter must be an array of image objects."
                )]
            
            # Create dataset directory
            dataset_path = DATASET_DIR / dataset_name
            dataset_path.mkdir(parents=True, exist_ok=True)
            
            # Save images and captions
            saved_count = 0
            for img_data in images:
                try:
                    # Validate image data structure
                    if not isinstance(img_data, dict):
                        logger.warning(f"Skipping invalid image data: not a dictionary")
                        continue
                    
                    if "filename" not in img_data or "content" not in img_data or "caption" not in img_data:
                        logger.warning(f"Skipping image with missing fields: {img_data.keys()}")
                        continue
                    
                    filename = Path(img_data["filename"]).name
                    image_path = dataset_path / filename
                    caption_path = image_path.with_suffix('.txt')
                    
                    # Save image
                    image_content = base64.b64decode(img_data["content"])
                    with open(image_path, 'wb') as f:
                        f.write(image_content)
                        
                    # Save caption
                    with open(caption_path, 'w') as f:
                        f.write(img_data["caption"])
                        
                    saved_count += 1
                    logger.info(f"Saved image: {filename}")
                    
                except Exception as e:
                    logger.error(f"Error saving image {img_data.get('filename', 'unknown')}: {str(e)}")
                    continue
            
            # Generate .aitk_size.json metadata for AI Toolkit compatibility
            create_aitk_metadata(dataset_path)
            
            result = f"Created dataset '{dataset_name}':\n"
            result += f"- Location: {dataset_path}\n"
            result += f"- Images saved: {saved_count}\n"
            result += f"- Generated .aitk_size.json metadata for AI Toolkit compatibility"
            
            return [types.TextContent(type="text", text=result)]
            
        elif name == "list-datasets":
            datasets = list_datasets()
            if datasets:
                result = "Available datasets:\n"
                for ds in datasets:
                    result += f"- {ds['name']} ({ds['image_count']} images)\n"
            else:
                result = "No datasets found."
            return [types.TextContent(type="text", text=result)]
            
        elif name == "start-training":
            config_name = arguments.get("config_name")
            
            if not config_name:
                return [types.TextContent(
                    type="text",
                    text="Error: Missing required parameter: config_name"
                )]
            
            config_path = CONFIG_DIR / f"{config_name}.yaml"
            
            if not config_path.exists():
                return [types.TextContent(type="text", text=f"Configuration '{config_name}' not found.")]
                
            # Start training via API
            response = await client.start_training(str(config_path))
            
            if response.get("success"):
                job_id = response.get("job_id")
                result = f"Started training job:\n"
                result += f"- Job ID: {job_id}\n"
                result += f"- Configuration: {config_name}\n"
                result += f"- Status: Running"
            else:
                result = f"Failed to start training: {response.get('error', 'Unknown error')}"
                
            return [types.TextContent(type="text", text=result)]
            
        elif name == "get-training-status":
            job_id = arguments.get("job_id")
            
            if not job_id:
                return [types.TextContent(
                    type="text",
                    text="Error: Missing required parameter: job_id"
                )]
            
            status = await client.get_training_status(job_id)
            
            if status.get("found"):
                result = f"Training job {job_id}:\n"
                result += f"- Status: {status.get('status', 'Unknown')}\n"
                result += f"- Progress: {status.get('progress', 0)}%\n"
                result += f"- Current step: {status.get('current_step', 0)}/{status.get('total_steps', 0)}\n"
                if status.get("eta"):
                    result += f"- ETA: {status.get('eta')}\n"
            else:
                result = f"Training job {job_id} not found."
                
            return [types.TextContent(type="text", text=result)]
            
        elif name == "stop-training":
            job_id = arguments.get("job_id")
            
            if not job_id:
                return [types.TextContent(
                    type="text",
                    text="Error: Missing required parameter: job_id"
                )]
            
            response = await client.stop_training(job_id)
            
            if response.get("success"):
                result = f"Successfully stopped training job {job_id}"
            else:
                result = f"Failed to stop training: {response.get('error', 'Unknown error')}"
                
            return [types.TextContent(type="text", text=result)]
            
        elif name == "list-exported-models":
            models = list_output_models()
            if models:
                result = "Available trained models:\n"
                for model in models:
                    size_mb = model['size'] / (1024 * 1024)
                    result += f"- {model['name']}\n"
                    result += f"  Path: {model['path']}\n"
                    result += f"  Size: {size_mb:.2f} MB\n"
                    result += f"  Modified: {model['modified']}\n"
            else:
                result = "No trained models found in outputs directory."
            return [types.TextContent(type="text", text=result)]
            
        elif name == "download-model":
            model_path = arguments.get("model_path")
            include_metadata = arguments.get("include_metadata", True)
            
            if not model_path:
                return [types.TextContent(
                    type="text",
                    text="Error: Missing required parameter: model_path"
                )]
            
            # Construct full path
            full_path = OUTPUT_DIR / model_path
            
            if not full_path.exists():
                return [types.TextContent(type="text", text=f"Model not found: {model_path}")]
                
            if not full_path.is_file():
                return [types.TextContent(type="text", text=f"Path is not a file: {model_path}")]
                
            # Check file size (warn if > 100MB)
            file_size = full_path.stat().st_size
            if file_size > 100 * 1024 * 1024:
                size_mb = file_size / (1024 * 1024)
                logger.warning(f"Large model file: {size_mb:.2f} MB")
                
            try:
                # Read and encode the model file
                with open(full_path, 'rb') as f:
                    model_content = base64.b64encode(f.read()).decode()
                    
                result = {
                    "filename": full_path.name,
                    "content": model_content,
                    "size": file_size,
                    "path": model_path
                }
                
                # Look for metadata files if requested
                if include_metadata:
                    metadata_files = [
                        full_path.with_suffix('.json'),
                        full_path.with_suffix('.metadata.json'),
                        full_path.parent / f"{full_path.stem}.metadata.json"
                    ]
                    
                    for metadata_path in metadata_files:
                        if metadata_path.exists():
                            try:
                                with open(metadata_path, 'r') as f:
                                    result["metadata"] = json.load(f)
                                    result["metadata_source"] = metadata_path.name
                                    break
                            except Exception as e:
                                logger.warning(f"Failed to load metadata: {e}")
                                
                    # Check for training config in parent directory
                    config_path = full_path.parent / "config.yaml"
                    if config_path.exists():
                        try:
                            with open(config_path, 'r') as f:
                                result["training_config"] = yaml.safe_load(f)
                        except Exception as e:
                            logger.warning(f"Failed to load training config: {e}")
                            
                return [types.TextContent(type="text", text=json.dumps(result, indent=2))]
                
            except Exception as e:
                logger.error(f"Error downloading model: {e}")
                return [types.TextContent(type="text", text=f"Error downloading model: {str(e)}")]
            
        elif name == "get-system-stats":
            stats = await client.get_system_stats()
            
            result = "System Statistics:\n"
            result += f"- GPU: {stats.get('gpu_name', 'Unknown')}\n"
            result += f"- GPU Memory: {stats.get('gpu_memory_used', 0)}/{stats.get('gpu_memory_total', 0)} MB\n"
            result += f"- GPU Utilization: {stats.get('gpu_utilization', 0)}%\n"
            result += f"- CPU Usage: {stats.get('cpu_percent', 0)}%\n"
            result += f"- RAM Usage: {stats.get('ram_used', 0)}/{stats.get('ram_total', 0)} GB\n"
            result += f"- Active jobs: {stats.get('active_jobs', 0)}"
                
            return [types.TextContent(type="text", text=result)]
            
        # Add more tool implementations as needed...
            
    except Exception as e:
        logger.error(f"Error handling tool {name}: {e}")
        return [types.TextContent(type="text", text=f"Error: {str(e)}")]
    finally:
        await client.disconnect_websocket()

 async def main():
    """Main entry point for the MCP server"""
    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
        await server.run(
            read_stream,
            write_stream,
            InitializationOptions(
                server_name="ai-toolkit-mcp",
                server_version="0.1.0",
                capabilities=server.get_capabilities(
                    notification_options=NotificationOptions(),
                    experimental_capabilities={},
                ),
            ),
        )

 if __name__ == "__main__":
    asyncio.run(main())
diff --git a/requirements.txt b/requirements.txt
 # MCP Server requirements
 mcp>=0.1.0
 pydantic>=2.0.0
 websocket-client>=1.6.0
 aiohttp>=3.9.0
 websockets>=11.0
 PyYAML>=6.0
 Pillow>=10.0.0
	# Exclude large files and directories
	models/
	output/
	input/
	temp/
	custom_nodes/
	logs/
	*.safetensors
	*.pt
	*.ckpt
	*.bin
	*.pth

	# Git files
	.git/
	.gitignore

	# Documentation
	README.md
	docker-compose.override.yml.example

	# OS and IDE files
	.DS_Store
	Thumbs.db
	.vscode/
	.idea/
	*.swp
	*.swo

	# Python cache
	__pycache__/
	*.pyc
	*.pyo
	*.pyd
	.Python
	venv/
	env/

	# Build artifacts
	*.log
	.dockerignore
	Dockerfile
	# Python
	__pycache__/
	*.py[cod]
	*$py.class
	*.so
	.Python
	env/
	venv/
	ENV/
	.venv

	# Directories removed for gist compatibility
	datasets/
	outputs/
	configs/
	logs/
	temp/

	# Docker
	docker-compose.override.yml
	.env

	# IDE
	.vscode/
	.idea/
	*.swp
	*.swo
	*~

	# OS
	.DS_Store
	Thumbs.db

	# Build artifacts
	build.log
	*.log
	services:
	ai-toolkit:
	build:
	context: .
	dockerfile: Dockerfile
	args:
	- http_proxy=${http_proxy:-}
	- https_proxy=${https_proxy:-}
	- no_proxy=${no_proxy:-}
	network: host # Use host network during build for better DNS resolution
	container_name: ai-toolkit-trainer-server
	runtime: nvidia
	shm_size: '2gb'
	environment:
	- NVIDIA_VISIBLE_DEVICES=all
	- NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
	ports:
	- "8675:8675"
	volumes:
	# Training data and outputs
	- ./datasets:/ai-toolkit/datasets
	- ./outputs:/ai-toolkit/outputs
	- ./configs:/ai-toolkit/configs

	# Temp directory for processing
	- ./temp:/ai-toolkit/temp

	# Optional: Share host DNS for better resolution (Linux)
	# Uncomment if experiencing DNS issues
	# - /etc/resolv.conf:/etc/resolv.conf:ro

	restart: unless-stopped
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:8675"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 60s

	networks:
	- ai-toolkit-network

	command: ai-toolkit

	mcp-ai-toolkit-server:
	build:
	context: .
	dockerfile: Dockerfile
	args:
	- http_proxy=${http_proxy:-}
	- https_proxy=${https_proxy:-}
	- no_proxy=${no_proxy:-}
	network: host # Use host network during build for better DNS resolution
	container_name: mcp-ai-toolkit-trainer
	runtime: nvidia
	shm_size: '2gb'
	environment:
	- NVIDIA_VISIBLE_DEVICES=all
	- NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
	- AI_TOOLKIT_SERVER_URL=http://ai-toolkit:8675
	- MCP_PROJECT_ROOT=/workspace
	- PYTHONPATH=/workspace/mcp_server
	- LOG_LEVEL=${LOG_LEVEL:-INFO}

	volumes:
	# Share the same directories
	- ./datasets:/ai-toolkit/datasets
	- ./outputs:/ai-toolkit/outputs
	- ./configs:/ai-toolkit/configs

	# Logs
	- ./logs:/workspace/logs

	# Optional: Share host DNS for better resolution (Linux)
	# Uncomment if experiencing DNS issues
	# - /etc/resolv.conf:/etc/resolv.conf:ro

	depends_on:
	ai-toolkit:
	condition: service_healthy

	stdin_open: true
	tty: true

	networks:
	- ai-toolkit-network

	command: mcp

	healthcheck:
	test: ["CMD", "python", "-c", "import mcp, websocket; print('OK')"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 40s

	logging:
	driver: "json-file"
	options:
	max-size: "10m"
	max-file: "3"

	# Optional: MCP HTTP API server
	# Provides HTTP access to MCP tools
	mcp-http-server:
	build:
	context: .
	dockerfile: Dockerfile
	args:
	- http_proxy=${http_proxy:-}
	- https_proxy=${https_proxy:-}
	- no_proxy=${no_proxy:-}
	network: host
	container_name: mcp-ai-toolkit-http
	runtime: nvidia
	shm_size: '2gb'
	environment:
	- NVIDIA_VISIBLE_DEVICES=all
	- NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
	- AI_TOOLKIT_SERVER_URL=http://ai-toolkit:8675
	- MCP_HTTP_PORT=8190
	- LOG_LEVEL=${LOG_LEVEL:-INFO}

	ports:
	- "8190:8190"

	volumes:
	# Share the same directories
	- ./datasets:/ai-toolkit/datasets
	- ./outputs:/ai-toolkit/outputs
	- ./configs:/ai-toolkit/configs

	depends_on:
	ai-toolkit:
	condition: service_healthy

	networks:
	- ai-toolkit-network

	command: mcp-http

	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:8190/health"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 40s

	networks:
	ai-toolkit-network:
	driver: bridge

	volumes:
	datasets-cache:
	driver: local
	FROM nvidia/cuda:12.1.0-base-ubuntu22.04

	# Install system dependencies
	RUN apt-get update && \
	DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
	python3.10 \
	python3.10-venv \
	python3-pip \
	git \
	wget \
	curl \
	unzip \
	libgl1 \
	libglib2.0-0 \
	libsm6 \
	libxext6 \
	libxrender-dev \
	libgomp1 \
	libgoogle-perftools4 \
	libtcmalloc-minimal4 \
	ca-certificates \
	gcc \
	g++ \
	build-essential \
	nodejs \
	npm \
	&& rm -rf /var/lib/apt/lists/*

	# Remove existing Node.js and update to v18
	RUN apt-get update && \
	apt-get remove -y nodejs libnode-dev && \
	apt-get autoremove -y && \
	curl -fsSL https://deb.nodesource.com/setup_18.x \| bash - && \
	apt-get install -y nodejs && \
	rm -rf /var/lib/apt/lists/*

	# Set working directory
	WORKDIR /ai-toolkit

	# Clone AI Toolkit
	RUN git clone --depth 1 https://github.com/ostris/ai-toolkit.git . && \
	echo "AI Toolkit directories:" && ls -la

	# Create Python virtual environment
	RUN python3 -m venv venv

	# Activate venv and install PyTorch first
	RUN . venv/bin/activate && \
	pip install --upgrade pip && \
	pip install --no-cache-dir \
	torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

	# Install AI Toolkit requirements
	RUN . venv/bin/activate && \
	pip install --no-cache-dir -r requirements.txt

	# Install MCP dependencies
	RUN . venv/bin/activate && \
	pip install --no-cache-dir \
	mcp \
	pydantic \
	websocket-client \
	aiohttp \
	PyYAML

	# Build the UI
	WORKDIR /ai-toolkit/ui
	RUN npm install && npm run update_db && npm run build

	WORKDIR /ai-toolkit

	# Create directories for training data and outputs
	RUN mkdir -p \
	datasets \
	outputs \
	configs \
	temp \
	/workspace/mcp_server && \
	# Ensure database file can be created
	touch /ai-toolkit/aitk_db.db && \
	chmod 666 /ai-toolkit/aitk_db.db

	# Copy MCP server files
	COPY mcp_server.py /workspace/mcp_server/
	COPY mcp_http_server.py /workspace/mcp_server/
	COPY requirements.txt /workspace/mcp_server/

	# Install additional MCP server dependencies
	WORKDIR /workspace/mcp_server
	RUN . /ai-toolkit/venv/bin/activate && \
	pip install --no-cache-dir -r requirements.txt

	# Expose AI Toolkit UI port
	EXPOSE 8675

	# Create necessary directories with proper permissions
	RUN mkdir -p /ai-toolkit/datasets /ai-toolkit/outputs /ai-toolkit/configs /ai-toolkit/temp && \
	chmod -R 777 /ai-toolkit/datasets /ai-toolkit/outputs /ai-toolkit/configs /ai-toolkit/temp

	# Set environment variables
	ENV PYTHONPATH=/ai-toolkit:/workspace/mcp_server
	ENV AI_TOOLKIT_PATH=/ai-toolkit
	ENV NVIDIA_VISIBLE_DEVICES=all
	ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics

	# Create entrypoint script
	RUN echo '#!/bin/bash\n\
	if [ "$1" = "ai-toolkit" ]; then\n\
	cd /ai-toolkit/ui && npm run start\n\
	elif [ "$1" = "mcp" ]; then\n\
	cd /workspace/mcp_server && /ai-toolkit/venv/bin/python mcp_server.py\n\
	elif [ "$1" = "mcp-http" ]; then\n\
	cd /workspace/mcp_server && /ai-toolkit/venv/bin/python mcp_http_server.py\n\
	else\n\
	exec "$@"\n\
	fi' > /entrypoint.sh && chmod +x /entrypoint.sh

	ENTRYPOINT ["/entrypoint.sh"]
	CMD ["ai-toolkit"]
	#!/usr/bin/env python3
	"""
	Example script showing how to use the AI Toolkit MCP server
	to create a training configuration and start training a LoRA model.
	"""

	import asyncio
	import json
	import base64
	from pathlib import Path
	import httpx

	# MCP HTTP server URL
	MCP_URL = "http://localhost:8190"

	async def call_mcp_tool(tool_name: str, arguments: dict = None):
	"""Call an MCP tool via the HTTP API"""
	async with httpx.AsyncClient() as client:
	response = await client.post(
	f"{MCP_URL}/mcp/tool",
	json={
	"tool": tool_name,
	"arguments": arguments or {}
	}
	)
	return response.json()

	async def main():
	"""Example workflow for training a LoRA model"""

	print("AI Toolkit MCP Example - Training a LoRA Model")
	print("=" * 50)

	# 1. List available datasets
	print("\n1. Listing available datasets...")
	result = await call_mcp_tool("list-datasets")
	print(result.get("result", "No result"))

	# 2. Create a training configuration
	print("\n2. Creating training configuration...")
	config_result = await call_mcp_tool(
	"create-training-config",
	{
	"name": "example-flux-lora",
	"model_name": "ostris/Flex.1-alpha", # Using publicly accessible FLUX model
	"dataset_path": "/ai-toolkit/datasets/example-dataset",
	"resolution": 1024,
	"batch_size": 1,
	"learning_rate": 0.0002, # Default is now 0.0002
	"steps": 1000, # Recommended: 100 × number of images
	"rank": 16,
	"alpha": 16,
	"low_vram": True,
	"trigger_word": "example_style",
	"test_prompts": [ # Changed from validation_prompt to test_prompts
	"a photo of example_style artwork",
	"a detailed image of example_style",
	"example_style in high quality",
	"example_style in a futuristic setting with neon lights"
	]
	}
	)
	print(config_result.get("result", "No result"))

	# 3. List configurations
	print("\n3. Listing training configurations...")
	configs_result = await call_mcp_tool("list-configs")
	print(configs_result.get("result", "No result"))

	# 4. Get system stats
	print("\n4. Getting system statistics...")
	stats_result = await call_mcp_tool("get-system-stats")
	print(stats_result.get("result", "No result"))

	# Example: Upload a dataset (commented out - needs real images)
	"""
	print("\n5. Uploading a dataset...")

	# Read and encode images
	images = []
	for i, image_path in enumerate(Path("./my_images").glob("*.jpg")):
	with open(image_path, "rb") as f:
	content = base64.b64encode(f.read()).decode()
	images.append({
	"filename": image_path.name,
	"content": content,
	"caption": f"a photo of example_style artwork, image {i+1}"
	})

	upload_result = await call_mcp_tool(
	"upload-dataset",
	{
	"dataset_name": "example-dataset",
	"images": images
	}
	)
	print(upload_result.get("result", "No result"))
	"""

	# Example: Start training (commented out - needs valid dataset)
	"""
	print("\n6. Starting training...")
	training_result = await call_mcp_tool(
	"start-training",
	{
	"config_name": "example-flux-lora"
	}
	)
	print(training_result.get("result", "No result"))

	# If training started, we could monitor it
	if "job_id" in training_result:
	job_id = training_result["job_id"]

	# Check status
	status_result = await call_mcp_tool(
	"get-training-status",
	{
	"job_id": job_id
	}
	)
	print(f"\nTraining status: {status_result.get('result', 'No result')}")
	"""

	# Example: List and download models (commented out - needs trained models)
	"""
	print("\n7. Listing trained models...")
	models_result = await call_mcp_tool("list-exported-models")
	print(models_result.get("result", "No result"))

	# Download a model if available
	print("\n8. Downloading a trained model...")
	download_result = await call_mcp_tool(
	"download-model",
	{
	"model_path": "example-flux-lora/checkpoint-1000.safetensors",
	"include_metadata": True
	}
	)

	if download_result.get("success"):
	import json
	model_data = json.loads(download_result.get("result", "{}"))

	# Save the model
	if "content" in model_data:
	model_content = base64.b64decode(model_data["content"])
	output_path = Path(f"./{model_data['filename']}")

	with open(output_path, "wb") as f:
	f.write(model_content)

	print(f"Model saved to: {output_path}")
	print(f"Model size: {len(model_content) / (1024*1024):.2f} MB")

	if "metadata" in model_data:
	print(f"Metadata: {json.dumps(model_data['metadata'], indent=2)}")
	"""

	if __name__ == "__main__":
	asyncio.run(main())
	#!/usr/bin/env python3
	"""
	HTTP API wrapper for MCP server - provides REST endpoints for MCP tools
	"""

	import asyncio
	import json
	import logging
	import os
	import sys
	from typing import Any, Dict, List, Optional
	from contextlib import asynccontextmanager
	from datetime import datetime

	from fastapi import FastAPI, HTTPException, Request, Response
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse, HTMLResponse
	from pydantic import BaseModel, Field
	import uvicorn

	# Configure logging
	logging.basicConfig(
	level=os.getenv('LOG_LEVEL', 'INFO'),
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Import the MCP server functions directly instead of using subprocess
	try:
	from mcp_server import (
	handle_call_tool,
	handle_list_tools,
	# Import any other necessary functions from your mcp_server
	AI_TOOLKIT_SERVER_URL,
	DATASET_DIR,
	CONFIG_DIR,
	OUTPUT_DIR,
	LOG_DIR
	)
	MCP_DIRECT_IMPORT = True
	logger.info("Successfully imported MCP server functions directly")
	except ImportError as e:
	logger.warning(f"Could not import MCP server directly: {e}")
	MCP_DIRECT_IMPORT = False

	# MCP Tool Request/Response Models
	class MCPToolRequest(BaseModel):
	"""Simplified tool request for HTTP API"""
	tool: str = Field(..., description="Name of the tool to execute")
	arguments: Dict[str, Any] = Field(default_factory=dict, description="Tool arguments")

	# Available tools documentation
	AVAILABLE_TOOLS = {
	"create-training-config": {
	"description": "Create a new LoRA training configuration",
	"parameters": {
	"name": "Configuration name (required)",
	"model_name": "Base model name (required)",
	"dataset_path": "Path to dataset folder (required)",
	"resolution": "Training resolution (default: 512)",
	"batch_size": "Training batch size (default: 1)",
	"learning_rate": "Learning rate (default: 0.0002)",
	"steps": "Number of training steps (default: 1000)",
	"rank": "LoRA rank (default: 16)",
	"alpha": "LoRA alpha (default: 16)",
	"use_wandb": "Enable W&B logging (default: false)",
	"low_vram": "Enable low VRAM mode (default: true)",
	"trigger_word": "Unique trigger word for the LoRA",
	"test_prompts": "Array of test prompts",
	"disable_sampling": "Disable sample generation (default: false)"
	}
	},
	"list-training-configs": {
	"description": "List all available training configurations",
	"parameters": {}
	},
	"get-training-config": {
	"description": "Get a specific training configuration",
	"parameters": {
	"name": "Configuration name (required)"
	}
	},
	"get-training-info": {
	"description": "Get training information including trigger word and test prompts",
	"parameters": {
	"name": "Configuration name (required)"
	}
	},
	"upload-dataset": {
	"description": "Upload images with captions to create a training dataset",
	"parameters": {
	"dataset_name": "Name for the dataset (required)",
	"images": "Array of images with filename, content (base64), and caption (required)"
	}
	},
	"list-datasets": {
	"description": "List available datasets with image counts",
	"parameters": {}
	},
	"start-training": {
	"description": "Start a training job using a saved configuration",
	"parameters": {
	"config_name": "Name of the configuration to use (required)"
	}
	},
	"get-training-status": {
	"description": "Get the current status of a training job",
	"parameters": {
	"job_id": "Training job ID (required)"
	}
	},
	"stop-training": {
	"description": "Stop a running training job",
	"parameters": {
	"job_id": "Training job ID to stop (required)"
	}
	},
	"list-training-jobs": {
	"description": "List all training jobs and their statuses",
	"parameters": {}
	},
	"export-model": {
	"description": "Export a trained model in the specified format",
	"parameters": {
	"job_id": "Training job ID (required)",
	"format": "Export format: 'safetensors' or 'ckpt' (default: 'safetensors')"
	}
	},
	"list-exported-models": {
	"description": "List all trained models available in the outputs directory",
	"parameters": {}
	},
	"download-model": {
	"description": "Download a trained LoRA model as base64-encoded content",
	"parameters": {
	"model_path": "Path to the model file relative to outputs directory (required)",
	"include_metadata": "Include training metadata if available (default: true)"
	}
	},
	"get-system-stats": {
	"description": "Get AI Toolkit system statistics including GPU usage",
	"parameters": {}
	},
	"get-training-logs": {
	"description": "Retrieve logs for a specific training job",
	"parameters": {
	"job_id": "Training job ID (required)",
	"lines": "Number of log lines to retrieve (default: 100)"
	}
	}
	}

	# Global variables for health check
	last_health_check = None
	ai_toolkit_status = "unknown"

	async def check_ai_toolkit_health():
	"""Check if AI Toolkit is accessible"""
	global ai_toolkit_status, last_health_check
	try:
	import aiohttp
	async with aiohttp.ClientSession() as session:
	async with session.get(f"{AI_TOOLKIT_SERVER_URL}/", timeout=5) as response:
	if response.status == 200:
	ai_toolkit_status = "healthy"
	else:
	ai_toolkit_status = "unhealthy"
	except Exception as e:
	ai_toolkit_status = f"error: {str(e)}"

	last_health_check = datetime.now()
	return ai_toolkit_status

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Manage application lifecycle"""
	# Startup
	logger.info("Starting MCP HTTP server...")

	# Check AI Toolkit health on startup
	await check_ai_toolkit_health()

	yield

	# Shutdown
	logger.info("Shutting down MCP HTTP server...")

	# Create FastAPI app
	app = FastAPI(
	title="MCP HTTP Server for AI Toolkit",
	description="HTTP API wrapper for AI Toolkit MCP server",
	version="1.0.0",
	lifespan=lifespan
	)

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	@app.get("/", response_class=HTMLResponse)
	async def root():
	"""API documentation"""
	html_content = f"""
	<html>
	<head>
	<title>MCP HTTP Server - AI Toolkit</title>
	<style>
	body {{ font-family: Arial, sans-serif; margin: 40px; }}
	h1 {{ color: #333; }}
	h2 {{ color: #666; margin-top: 30px; }}
	.endpoint {{ background: #f5f5f5; padding: 10px; margin: 10px 0; border-radius: 5px; }}
	.method {{ font-weight: bold; color: #0066cc; }}
	code {{ background: #e8e8e8; padding: 2px 5px; border-radius: 3px; }}
	pre {{ background: #f0f0f0; padding: 15px; border-radius: 5px; overflow-x: auto; }}
	.tool {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
	.params {{ margin-left: 20px; }}
	.status {{ padding: 10px; margin: 10px 0; border-radius: 5px; }}
	.healthy {{ background: #d4edda; color: #155724; }}
	.unhealthy {{ background: #f8d7da; color: #721c24; }}
	</style>
	</head>
	<body>
	<h1>MCP HTTP Server - AI Toolkit</h1>
	<p>HTTP API wrapper for AI Toolkit MCP server. This provides REST endpoints for training LoRA models.</p>

	<div class="status {'healthy' if ai_toolkit_status == 'healthy' else 'unhealthy'}">
	<strong>AI Toolkit Status:</strong> {ai_toolkit_status}
	{f' (checked: {last_health_check.strftime("%Y-%m-%d %H:%M:%S")})' if last_health_check else ''}
	</div>

	<h2>Endpoints</h2>

	<div class="endpoint">
	<span class="method">GET</span> <code>/</code> - This documentation
	</div>

	<div class="endpoint">
	<span class="method">GET</span> <code>/health</code> - Health check
	</div>

	<div class="endpoint">
	<span class="method">POST</span> <code>/mcp/tool</code> - Execute any MCP tool
	</div>

	<div class="endpoint">
	<span class="method">GET</span> <code>/mcp/tools</code> - List available tools
	</div>

	<div class="endpoint">
	<span class="method">GET</span> <code>/datasets</code> - List datasets (convenience endpoint)
	</div>

	<div class="endpoint">
	<span class="method">GET</span> <code>/configs</code> - List training configurations (convenience endpoint)
	</div>

	<h2>Available Tools</h2>
	"""

	for tool_name, tool_info in AVAILABLE_TOOLS.items():
	params_html = ""
	if tool_info["parameters"]:
	params_html = "<div class='params'><strong>Parameters:</strong><ul>"
	for param, desc in tool_info["parameters"].items():
	params_html += f"<li><code>{param}</code>: {desc}</li>"
	params_html += "</ul></div>"

	html_content += f"""
	<div class="tool">
	<h3>{tool_name}</h3>
	<p>{tool_info["description"]}</p>
	{params_html}
	</div>
	"""

	html_content += """
	<h2>Example Usage</h2>

	<h3>Create Training Configuration</h3>
	<pre>curl -X POST http://localhost:8190/mcp/tool \\
	-H "Content-Type: application/json" \\
	-d '{
	"tool": "create-training-config",
	"arguments": {
	"name": "my-flux-lora",
	"model_name": "ostris/Flex.1-alpha",
	"dataset_path": "/ai-toolkit/datasets/my-dataset",
	"steps": 2000,
	"rank": 32,
	"trigger_word": "my_style",
	"test_prompts": [
	"my_style cat sitting on a couch",
	"my_style cat playing with toys",
	"my_style cat in a garden",
	"photo of a regular cat"
	]
	}
	}'</pre>

	<h3>Start Training</h3>
	<pre>curl -X POST http://localhost:8190/mcp/tool \\
	-H "Content-Type: application/json" \\
	-d '{
	"tool": "start-training",
	"arguments": {
	"config_name": "my-flux-lora"
	}
	}'</pre>

	<h3>Check Training Status</h3>
	<pre>curl -X POST http://localhost:8190/mcp/tool \\
	-H "Content-Type: application/json" \\
	-d '{
	"tool": "get-training-status",
	"arguments": {
	"job_id": "job_12345"
	}
	}'</pre>

	<h3>Upload Dataset</h3>
	<pre>curl -X POST http://localhost:8190/mcp/tool \\
	-H "Content-Type: application/json" \\
	-d '{
	"tool": "upload-dataset",
	"arguments": {
	"dataset_name": "my-dataset",
	"images": [
	{
	"filename": "image1.jpg",
	"content": "BASE64_ENCODED_IMAGE_DATA",
	"caption": "A beautiful sunset over the mountains"
	},
	{
	"filename": "image2.jpg",
	"content": "BASE64_ENCODED_IMAGE_DATA",
	"caption": "A serene lake surrounded by trees"
	}
	]
	}
	}'</pre>
	</body>
	</html>
	"""

	return html_content

	@app.get("/health")
	async def health_check():
	"""Health check endpoint"""
	# Update AI Toolkit status
	await check_ai_toolkit_health()

	return {
	"status": "healthy",
	"service": "mcp-http-server",
	"ai_toolkit": ai_toolkit_status,
	"timestamp": datetime.now().isoformat(),
	"import_mode": "direct" if MCP_DIRECT_IMPORT else "subprocess"
	}

	@app.get("/mcp/tools")
	async def list_tools():
	"""List all available MCP tools"""
	try:
	if MCP_DIRECT_IMPORT:
	# Call the list tools handler directly
	result = await handle_list_tools()
	tools = []

	# Extract tools from the result
	if result and len(result) > 0:
	# Parse the text content which should contain JSON
	text_content = result[0].text
	if text_content:
	try:
	tools_data = json.loads(text_content)
	tools = tools_data.get("tools", [])
	except json.JSONDecodeError:
	logger.error(f"Failed to parse tools JSON: {text_content}")

	return {"tools": tools}
	else:
	# Fallback to returning our documented tools
	tools = []
	for name, info in AVAILABLE_TOOLS.items():
	tools.append({
	"name": name,
	"description": info["description"]
	})
	return {"tools": tools}

	except Exception as e:
	logger.error(f"Error listing tools: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/mcp/tool")
	async def execute_tool(request: MCPToolRequest):
	"""Execute an MCP tool"""
	try:
	logger.info(f"Executing tool: {request.tool} with arguments: {request.arguments}")

	# Call the tool handler directly
	result = await handle_call_tool(request.tool, request.arguments)

	# Extract the text content from the result
	if result and len(result) > 0:
	text_content = result[0].text

	# Try to parse as JSON if it looks like JSON
	if text_content and text_content.strip().startswith(("{", "[")):
	try:
	return {"success": True, "result": json.loads(text_content)}
	except json.JSONDecodeError:
	return {"success": True, "result": text_content}
	else:
	return {"success": True, "result": text_content}
	else:
	return {"success": True, "result": None}

	except Exception as e:
	logger.error(f"Error executing tool {request.tool}: {e}", exc_info=True)
	return {"success": False, "error": str(e)}

	# Convenience endpoints
	@app.get("/datasets")
	async def list_datasets():
	"""List available datasets"""
	try:
	if MCP_DIRECT_IMPORT:
	result = await handle_call_tool("list-datasets", {})
	if result and len(result) > 0:
	text_content = result[0].text
	try:
	return json.loads(text_content)
	except:
	return {"datasets": [], "error": "Failed to parse response"}
	else:
	return {"datasets": [], "error": "MCP not available"}
	except Exception as e:
	logger.error(f"Error listing datasets: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/configs")
	async def list_configs():
	"""List training configurations"""
	try:
	if MCP_DIRECT_IMPORT:
	result = await handle_call_tool("list-training-configs", {})
	if result and len(result) > 0:
	text_content = result[0].text
	try:
	return json.loads(text_content)
	except:
	return {"configs": [], "error": "Failed to parse response"}
	else:
	return {"configs": [], "error": "MCP not available"}
	except Exception as e:
	logger.error(f"Error listing configs: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	def main():
	"""Main entry point"""
	host = os.getenv("MCP_HTTP_HOST", "0.0.0.0")
	port = int(os.getenv("MCP_HTTP_PORT", "8190"))

	logger.info(f"Starting MCP HTTP server on {host}:{port}")
	logger.info(f"Import mode: {'direct' if MCP_DIRECT_IMPORT else 'subprocess'}")

	uvicorn.run(
	app,
	host=host,
	port=port,
	log_level=os.getenv("LOG_LEVEL", "info").lower()
	)

	if __name__ == "__main__":
	main()
	# MCP Server requirements
	mcp>=0.1.0
	pydantic>=2.0.0
	websocket-client>=1.6.0
	aiohttp>=3.9.0
	websockets>=11.0
	PyYAML>=6.0
	Pillow>=10.0.0