darinkishore · February 25, 2025 19:37
diff --git a/claudette_dense.xml b/claudette_dense.xml
 <docs>
    <title>Claudette: A High-Level Claude Interface</title>


    <design_philosophy>
        <explanation>
            Core design principles that inform all features:

            1. State Management:
               - Chat maintains its own context
               - Tools preserve their state
               - Caching optimizes repeated content

            2. Feature Integration:
               - Tools can process any input
               - Images can use all features
               - Structure works everywhere

            3. Type Safety:
               - Tool inputs are validated
               - Outputs are structured
               - Errors are descriptive
        </explanation>
    </design_philosophy>

    <feature_relationships>
        <mermaid>
        graph TD
            A[Chat] --> B[Tools]
            A --> C[Images]
            A --> D[Structure]

            E[Caching] --> B
            E --> C
            E --> D

            B --> F[Complex Operations]
            C --> F
            D --> F

            style A fill:#e1f5fe
            style E fill:#e8f5e9
            style F fill:#fff3e0
        </mermaid>

        <key_concepts>
            <concept>Chat is the foundation - everything builds on it</concept>
            <concept>Caching optimizes all other features</concept>
            <concept>Features combine for complex operations</concept>
        </key_concepts>
    </feature_relationships>

    <quick_setup>
        <code language="python">
            from claudette import *

            # Available models
            models = [
                'claude-3-opus-20240229',
                'claude-3-5-sonnet-20241022',
                'claude-3-haiku-20240307'
            ]

            # We'll use Sonnet for examples
            model = models[1]

            # Basic chat setup
            chat = Chat(model, sp="You are a helpful assistant.")
        </code>
    </quick_setup>

    <section id="chat">
        <title>Chat Interface</title>

        <core_concepts>
            <mermaid>
            graph TD
                A[Chat Instance] --> B[Message History]
                A --> C[System Prompt]
                B --> D[Regular Messages]
                B --> E[Tool Calls]
                B --> F[Image Messages]

                C --> G[Response Generation]
                D --> G
                E --> G
                F --> G

                G --> H[Streaming]
                G --> I[Full Response]

                style A fill:#e1f5fe
                style G fill:#e8f5e9
                style H,I fill:#fff3e0
            </mermaid>

            <explanation>
                The Chat class provides a stateful interface that:

                1. Message Management:
                   - Automatic history tracking
                   - Context window optimization
                   - System prompt persistence

                2. Response Handling:
                   - Streaming support
                   - Prefill capability
                   - Rich response metadata

                3. Feature Integration:
                   - Tool calling system
                   - Image processing
                   - Structured output
                   - Prompt caching
            </explanation>
        </core_concepts>

        <patterns>
            <pattern id="basic_chat">
                <title>Basic Chat Pattern</title>
                <code language="python">
                    # Initialize with system prompt
                    chat = Chat(
                        model, sp="You are a helpful and concise assistant."
                    )

                    # Simple conversation
                    response = chat("I'm Jeremy")
                    print(response)

                    response = chat("What's my name?")
                    print(response)

                    # Access full response details
                    print(f"Model: {response.model}")
                    print(f"Role: {response.role}")
                    print(f"Stop reason: {response.stop_reason}")
                    print(f"Usage: {response.usage}")
                </code>
                <output>
                    Hello Jeremy, nice to meet you.
                    Your name is Jeremy.

                    Model: claude-3-5-sonnet-20241022
                    Role: assistant
                    Stop reason: end_turn
                    Usage: In: 54; Out: 8; Total: 62
                </output>
            </pattern>

            <pattern id="prefill">
                <title>Response Control Pattern</title>
                <explanation>
                    Prefill provides fine-grained control over responses:
                    - Guide initial response words
                    - Ensure consistent formatting
                    - Maintain conversation flow
                </explanation>
                <code language="python">
                    # Format control
                    response = chat(
                        "What's the capital of France?", prefill="The capital of France is"
                    )
                    print(response)

                    # Style control
                    response = chat(
                        "Explain quantum physics", prefill="In simple terms,"
                    )
                    print(response)

                    # List format
                    response = chat(
                        "Name three fruits", prefill="The fruits are:"
                    )
                    print(response)
                </code>
                <output>
                    The capital of France is Paris.
                    In simple terms, quantum physics describes how matter and energy behave at the smallest scales.
                    The fruits are: apple, banana, and orange.
                </output>
            </pattern>

            <pattern id="streaming">
                <title>Streaming Pattern</title>
                <explanation>
                    Streaming provides immediate feedback and progress indication:
                    - Real-time response generation
                    - Early error detection
                    - Better UX for long responses
                </explanation>
                <code language="python">
                    # Basic streaming
                    for chunk in chat(
                        "Write a haiku", stream=True
                    ):
                        print(chunk, end='')

                    # Streaming with prefill
                    for chunk in chat(
                        "List colors", prefill="Colors:", stream=True
                    ):
                        print(chunk, end='')

                    # Stream with metadata
                    response = chat(
                        "Long response", stream=True, return_full=True  # Get full response object
                    )
                    for chunk in response:
                        print(chunk, end='')
                    print(f"\nTotal tokens: {response.usage.total_tokens}")
                </code>
            </pattern>

            <pattern id="error_handling">
                <title>Error Handling Pattern</title>
                <code language="python">
                    try:
                        response = chat(
                            "Complex query with context", max_tokens=100  # Limit response length
                        )
                    except TokenLimitError as e:
                        print(f"Token limit exceeded: {e.usage}")
                    except APIError as e:
                        print(f"API error: {e.status_code}")
                    except NetworkError as e:
                        print(f"Network error: {e}")
                </code>
            </pattern>
        </patterns>

        <integration_points>
            <title>Feature Integration</title>
            <explanation>
                Chat seamlessly integrates with all Claudette features:

                1. Tool Integration:
                   - Automatic tool calling
                   - Result handling
                   - State preservation

                2. Image Support:
                   - Multiple image handling
                   - Image analysis
                   - Context maintenance

                3. Structured Output:
                   - Pydantic model support
                   - Schema validation
                   - Type safety

                4. Prompt Caching:
                   - Automatic cache management
                   - Token optimization
                   - Context reuse
            </explanation>
            <code language="python">
                # Tool integration
                chat = Chat(model, tools=[calculator])

                # Image support
                chat([image_bytes, "Describe this image"])

                # Structured output
                chat.struct(WeatherReport, "What's the weather?")

                # Prompt caching
                chat(cached_message, cache=True)
            </code>
        </integration_points>

        <best_practices>
            <list>
                - Use consistent system prompts
                - Handle errors appropriately
                - Monitor token usage
                - Consider streaming for long responses
                - Use prefill for consistent formats
                - Leverage integration features
            </list>
        </best_practices>

        <section id="async_support">
            <title>Async Support</title>

            <explanation>
                Async support is fundamental to Claudette's design:

                1. Performance Benefits:
                   - Non-blocking operations
                   - Efficient resource usage
                   - Better scalability

                2. Integration Patterns:
                   - Works with async web frameworks
                   - Supports async tool calls
                   - Enables parallel operations

                3. Developer Experience:
                   - Consistent with sync interface
                   - Natural error handling
                   - Simple conversion from sync code
            </explanation>

            <patterns>
                <pattern id="basic_async">
                    <title>Basic Async Usage</title>
                    <code language="python">
                        from claudette import AsyncChat, AsyncClient

                        async def basic_example():
                            chat = AsyncChat(
                                model, sp="You are a helpful assistant."
                            )

                            # Single response
                            response = await chat("Hello!")
                            print(response)

                            # Multiple turns
                            await chat("I'm learning async programming")
                            response = await chat("What am I learning about?")
                            print(response)
                    </code>
                    <output>
                        Hello! How can I help you today?
                        You're learning about async programming.
                    </output>
                </pattern>

                <pattern id="async_streaming">
                    <title>Async Streaming</title>
                    <code language="python">
                        async def stream_example():
                            chat = AsyncChat(model)

                            # Basic streaming
                            async for chunk in await chat(
                                "Count to 5", stream=True
                            ):
                                print(chunk, end='')

                            # Streaming with prefill
                            async for chunk in await chat(
                                "List three colors", prefill="The colors are:", stream=True
                            ):
                                print(chunk, end='')
                    </code>
                    <output>
                        1... 2... 3... 4... 5!
                        The colors are: red, blue, and green.
                    </output>
                </pattern>

                <pattern id="async_tools">
                    <title>Async Tool Integration</title>
                    <code language="python">
                        async def fetch_data(url: str) -> dict:
                            """Async fetch from API"""
                            async with aiohttp.ClientSession() as session:
                                async with session.get(url) as response:
                                    return await response.json()

                        async def process_example():
                            chat = AsyncChat(
                                model, tools=[fetch_data]
                            )

                            result = await chat.toolloop(
                                "Get weather data from api.weather.com", stream=True
                            )
                            print(result)
                    </code>
                </pattern>
            </patterns>

            <technical_notes>
                <list>
                    - AsyncChat maintains same interface as Chat
                    - All features (tools, images, caching) work with async
                    - Use 'async for' with streaming responses
                    - Async tools must be coroutines
                    - Error handling follows standard async patterns
                </list>
            </technical_notes>

            <common_patterns>
                <title>Common Integration Patterns</title>
                <code language="python">
                    # FastAPI Integration
                    app = FastAPI()

                    @app.post("/chat")
                    async def chat_endpoint(message: str):
                        chat = AsyncChat(model)
                        response = await chat(message)
                        return {"response": response}

                    # Parallel Operations
                    async def parallel_example():
                        chat1 = AsyncChat(model)
                        chat2 = AsyncChat(model)

                        results = await asyncio.gather(
                            chat1("Hello"),
                            chat2("Hi there")
                        )
                        return results
                </code>
            </common_patterns>

            <error_handling>
                <title>Async Error Handling</title>
                <code language="python">
                    async def safe_chat():
                        try:
                            chat = AsyncChat(model)
                            async for chunk in await chat(
                                "Complex query", stream=True
                            ):
                                print(chunk, end='')
                        except ConnectionError as e:
                            print(f"Connection failed: {e}")
                        except TokenLimitError as e:
                            print(f"Token limit exceeded: {e.usage}")
                        except AsyncToolError as e:
                            print(f"Tool error: {e.tool_name}")
                </code>
            </error_handling>
        </section>

    </section>

    <section id="tool_use">
        <title>Tool Use System</title>

        <core_concepts>
            <mermaid>
            graph TD
                A[Tool Definition] --> B[Chat Integration]
                B --> C[Execution Flow]
                C --> D[Response Handling]

                B --> E[Single Tool]
                B --> F[Tool Chain]

                style A fill:#e1f5fe
                style B fill:#e8f5e9
                style C fill:#fff3e0
                style D fill:#f3e5f5
            </mermaid>

            <explanation>
                Claudette's tool system transforms function definitions into Claude-accessible tools:

                1. Definition Layer:
                   - Python functions become tools
                   - Type hints provide structure
                   - Docstrings give context

                2. Integration Layer:
                   - Automatic tool registration
                   - Natural conversation flow
                   - State preservation

                3. Execution Layer:
                   - Automatic parameter handling
                   - Error management
                   - Result formatting
            </explanation>
        </core_concepts>

        <patterns>
            <pattern id="basic_tool">
                <title>Single Tool Pattern</title>
                <explanation>
                    The fundamental pattern for tool integration showing:
                    - Function definition with type hints
                    - Tool registration
                    - Usage in conversation
                </explanation>
                <code language="python">
                    def calculator(
                        a: int,    # First number
                        b: int,    # Second number
                        op: str = "add"  # Operation to perform
                    ) -> int:      # Result of calculation
                        """Performs basic math operations.

                        Args:
                            a: First operand
                            b: Second operand
                            op: Operation ('add' or 'multiply')

                        Returns:
                            Result of the operation
                        """
                        print(f"Calculating {a} {op} {b}")
                        if op == "add":
                            return a + b
                        elif op == "multiply":
                            return a * b
                        raise ValueError(f"Unknown operation: {op}")

                    # Create chat with tool
                    chat = Chat(
                        model, sp="Be direct in responses.", tools=[calculator]
                    )

                    # Use tool in conversation
                    response = chat(
                        "What's 123 plus 456?", tool_choice='calculator'  # Force tool usage
                    )
                </code>
                <output>
                    Calculating 123 add 456
                    The sum is 579.
                </output>
            </pattern>

            <pattern id="tool_chain">
                <title>Tool Chain Pattern</title>
                <explanation>
                    Complex operations often require multiple tool calls:
                    - Tools can be called sequentially
                    - Results flow between tools
                    - Chat maintains reasoning chain
                </explanation>
                <code language="python">
                    def fetch_data(url: str) -> dict:
                        """Fetches JSON data from URL."""
                        return requests.get(url).json()

                    def analyze_data(
                        data: dict,
                        metric: str
                    ) -> float:
                        """Analyzes specific metric in data."""
                        return data.get(metric, 0.0)

                    chat = Chat(
                        model, tools=[fetch_data, analyze_data]
                    )

                    # Tool chain execution
                    def trace(msg): print(f"TRACE: {msg}")

                    result = chat.toolloop(
                        "Get the temperature from weather API and analyze it", trace_func=trace  # See execution flow
                    )
                </code>
                <output>
                    TRACE: Fetching data from API...
                    TRACE: Analyzing temperature metric...
                    The current temperature is 22.5°C.
                </output>
            </pattern>

            <pattern id="state_flow_example">
                <title>Complete Tool Flow</title>

                <explanation>
                        The tool system flows through distinct states, each handling a specific aspect:

                        1. Definition & Registration:
                           - Functions become tools through type hints and docstrings
                           - Tools are registered with Chat instance
                           - System builds parameter schemas

                        2. Execution Flow:
                           - Parameters are extracted from natural language
                           - Validation ensures type safety
                           - Execution handles both success and failure paths

                        3. State Management:
                           - Results are processed into chat context
                           - History maintains tool execution records
                           - Errors are handled gracefully with recovery options

                        This pattern shows a complete flow from definition to execution,
                        with tracing to observe each state transition.
                </explanation>

                <code language="python">
                        # Definition State
                        def analyze_data(
                            data: dict,    # Input data structure
                            metrics: list[str] = ["mean"]  # Metrics to calculate
                        ) -> dict:        # Analysis results
                            """Analyzes data using specified metrics.

                            Each metric will be calculated and returned
                            with its confidence score.
                            """
                            return {
                                "results": {m: calculate(data, m) for m in metrics},
                                "confidence": 0.95
                            }

                        # Registration & Setup
                        chat = Chat(
                            model, tools=[analyze_data], tool_choice='analyze_data'  # Force usage
                        )

                        # Execution Flow with Error Handling
                        try:
                            result = chat.toolloop(
                                "Analyze this data for mean and variance", trace_func=lambda msg: print(f"TRACE: {msg}")
                            )
                        except ToolExecutionError as e:
                            print(f"Tool failed: {e.tool_name}")
                            print(f"At stage: {e.stage}")  # Shows where in flow it failed
                        except ValidationError as e:
                            print(f"Invalid parameters: {e.errors()}")
                </code>
                <output>
                        TRACE: Preparing tool call...
                        TRACE: Validating parameters...
                        TRACE: Executing analyze_data...
                        TRACE: Processing results...
                        TRACE: Updating chat context...
                </output>
            </pattern>

            <pattern id="error_handling">
                <title>Error Handling Pattern</title>
                <code language="python">
                    try:
                        result = chat.toolloop(
                            "Complex calculation", max_tools=5  # Limit tool calls
                        )
                    except ToolExecutionError as e:
                        print(f"Tool failed: {e.tool_name}")
                        print(f"Args: {e.args}")
                        print(f"Error: {e.original_error}")
                    except TooManyToolsError as e:
                        print(f"Exceeded tool limit: {e.count}")
                </code>
            </pattern>
        </patterns>

        <integration_points>
            <title>Feature Integration</title>
            <explanation>
                Tools seamlessly integrate with other Claudette features:

                1. Image Processing:
                   - Tools can receive image data
                   - Process images in conversation
                   - Return image analysis

                2. Structured Output:
                   - Tools can return complex objects
                   - Results automatically formatted
                   - Type safety maintained

                3. Streaming:
                   - See tool execution in real-time
                   - Track progress of long operations
                   - Handle partial results
            </explanation>
            <code language="python">
                # Image processing tool
                def analyze_image(
                    image: bytes,
                    mode: str = "colors"
                ) -> dict:
                    """Analyzes image content."""
                    return {"colors": ["red", "blue"]}

                # Structured output tool
                def get_weather(
                    location: str
                ) -> WeatherReport:
                    """Returns weather data as structured object."""
                    return WeatherReport(temp=22.5, conditions="sunny")

                chat = Chat(
                    model, tools=[analyze_image, get_weather]
                )
            </code>
        </integration_points>

        <best_practices>
            <list>
                - Use clear type hints and docstrings
                - Break complex operations into simple tools
                - Handle errors at appropriate levels
                - Use trace_func for debugging
                - Consider tool call limits
                - Maintain tool function purity
            </list>
        </best_practices>
    </section>

    <section id="prompt_caching">
        <title>Prompt Caching System</title>

        <core_concepts>
            <mermaid>
            graph TD
                A[Cache Creation] --> B[Token Storage]
                B --> C[Cache Retrieval]

                D[Long Content] --> A
                E[Image Data] --> A
                F[Tool Results] --> A

                B --> G[Token Optimization]
                C --> G

                G --> H[Reduced API Costs]
                G --> I[Faster Responses]

                style A fill:#e1f5fe
                style B fill:#e8f5e9
                style C fill:#e8f5e9
                style G fill:#fff3e0
            </mermaid>

            <explanation>
                The caching system optimizes token usage and performance by:

                1. Cache Management:
                   - Automatic token counting
                   - Efficient storage
                   - Smart retrieval

                2. Performance Benefits:
                   - Reduced API costs
                   - Faster responses
                   - Lower memory usage

                3. Integration Features:
                   - Works with all content types
                   - Preserves context
                   - Maintains conversation flow
            </explanation>
        </core_concepts>

        <patterns>
            <pattern id="basic_caching">
                <title>Basic Caching Pattern</title>
                <explanation>
                    Cache long or frequently used content to optimize token usage
                </explanation>
                <code language="python">
                    # Create message with caching enabled
                    long_document = """
                    [Very long text content...]
                    """

                    cached_msg = mk_msg(long_document, cache=True)

                    chat = Chat(model)

                    # First use creates cache
                    response = chat(cached_msg)
                    print(f"Initial usage: {response.usage}")

                    # Subsequent uses read from cache
                    response = chat("Summarize the document again")
                    print(f"Cached usage: {response.usage}")
                </code>
                <output>
                    Initial usage: In: 54; Out: 8; Cache create: 5000; Cache read: 0; Total: 5062
                    Cached usage: In: 25; Out: 12; Cache create: 0; Cache read: 5000; Total: 5037
                </output>
            </pattern>

            <pattern id="cache_with_tools">
                <title>Tool Results Caching</title>
                <explanation>
                    Cache tool results for expensive operations
                </explanation>
                <code language="python">
                    def expensive_calculation(data: dict) -> dict:
                        """Performs complex calculation."""
                        return {"result": "complex_output"}

                    chat = Chat(
                        model, tools=[expensive_calculation]
                    )

                    # Cache the tool result
                    result = chat.toolloop(
                        "Perform calculation", cache=True  # Cache tool output
                    )

                    # Subsequent queries use cached result
                    follow_up = chat(
                        "Explain the calculation again", cache=True
                    )
                </code>
            </pattern>

            <pattern id="selective_caching">
                <title>Selective Caching Pattern</title>
                <explanation>
                    Cache specific parts of complex interactions
                </explanation>
                <code language="python">
                    # Cache creation with specific content
                    def create_cached_content(text: str, name: str):
                        """Creates cached content with identifier."""
                        return mk_msg(
                            text, cache=True, cache_key=name  # Optional identifier
                        )

                    # Create multiple caches
                    background = create_cached_content(
                        "Long background info...",
                        "background"
                    )
                    reference = create_cached_content(
                        "Technical reference...",
                        "reference"
                    )

                    # Use specific caches as needed
                    chat(background)
                    chat("Explain using the background")

                    chat(reference)
                    chat("Now explain using the reference")
                </code>
            </pattern>
        </patterns>

        <integration_points>
            <title>Feature Integration</title>
            <explanation>
                Caching integrates with other Claudette features:

                1. Image Integration:
                   - Cache image tokens
                   - Reuse image analysis
                   - Optimize multi-image interactions

                2. Tool Integration:
                   - Cache expensive tool results
                   - Preserve tool context
                   - Optimize repeated operations

                3. Structured Output:
                   - Cache complex schemas
                   - Reuse parsed structures
                   - Maintain type safety
            </explanation>
            <code language="python">
                # Image caching
                image_msg = mk_msg([image_bytes, "Analyze this"], cache=True)

                # Tool result caching
                tool_msg = mk_msg(
                    "Run complex calculation", cache=True, tool_context=True  # Cache tool results
                )

                # Structure caching
                schema_msg = mk_msg(
                    "Generate weather report", cache=True, schema=WeatherReport
                )
            </code>
        </integration_points>

        <error_handling>
            <title>Cache Error Handling</title>
            <code language="python">
                try:
                    response = chat(
                        cached_msg, cache=True, cache_key="unique_id"
                    )
                except CacheCreationError as e:
                    print(f"Failed to create cache: {e}")
                    print(f"Token count: {e.token_count}")
                except CacheReadError as e:
                    print(f"Failed to read cache: {e}")
                    print(f"Cache key: {e.cache_key}")
                except TokenLimitError as e:
                    print(f"Combined tokens exceed limit: {e.usage}")
            </code>
        </error_handling>

        <best_practices>
            <list>
                - Cache long or frequently used content
                - Use cache_key for better organization
                - Monitor token usage patterns
                - Cache expensive tool operations
                - Consider cache lifetime
                - Handle cache errors appropriately
            </list>
        </best_practices>

        <technical_notes>
            <list>
                - Caches persist across chat sessions
                - Token counts include cache overhead
                - Cache keys must be unique
                - Tool results can be cached
                - Images benefit significantly from caching
            </list>
        </technical_notes>
    </section>

    <section id="image_handling">
        <title>Image Processing System</title>

        <core_concepts>
            <mermaid>
            graph TD
                A[Image Input] --> B[Processing]
                A --> C[Caching]

                B --> D[Single Image]
                B --> E[Multiple Images]

                D --> F[Analysis]
                E --> F

                F --> G[Tool Use]
                F --> H[Chat Context]

                C --> I[Token Optimization]
                I --> H

                style A fill:#e1f5fe
                style B,C fill:#e8f5e9
                style F fill:#fff3e0
                style H fill:#f3e5f5
            </mermaid>

            <explanation>
                Claudette's image system provides:

                1. Input Management:
                   - Single image handling
                   - Multiple image support
                   - Automatic format handling

                2. Integration Features:
                   - Natural conversation flow
                   - Tool interaction
                   - Context preservation

                3. Performance Optimization:
                   - Automatic token management
                   - Smart caching
                   - Memory efficiency
            </explanation>
        </core_concepts>

        <patterns>
            <pattern id="basic_image">
                <title>Single Image Pattern</title>
                <explanation>
                    Basic image analysis with conversation context
                </explanation>
                <code language="python">
                    # Load image
                    image_bytes = Path('samples/puppy.jpg').read_bytes()

                    chat = Chat(model)

                    # Single image analysis
                    response = chat([
                        image_bytes,
                        "What do you see in this image?"
                    ])
                    print(response)

                    # Follow-up questions maintain context
                    response = chat("What colors are present?")
                    print(response)

                    # Check token usage
                    print(f"Usage: {response.usage}")
                </code>
                <output>
                    I see an adorable Cavalier King Charles Spaniel puppy lying in grass with purple flowers in the background.

                    The main colors in the image are brown and white from the puppy's fur, green from the grass, and purple from the flowers in the background.

                    Usage: In: 110; Out: 37; Total: 147
                </output>
            </pattern>

            <pattern id="multi_image">
                <title>Multiple Image Pattern</title>
                <explanation>
                    Handle multiple images in single interaction
                </explanation>
                <code language="python">
                    # Load multiple images
                    image1 = Path('samples/image1.jpg').read_bytes()
                    image2 = Path('samples/image2.jpg').read_bytes()

                    # Compare images
                    response = chat([
                        image1,
                        image2,
                        "Compare these two images"
                    ])

                    # Maintain context for both images
                    response = chat(
                        "Which image has brighter colors?"
                    )
                </code>
            </pattern>

            <pattern id="image_with_tools">
                <title>Image Tool Integration</title>
                <explanation>
                    Combine image analysis with tool usage
                </explanation>
                <code language="python">
                    def analyze_colors(
                        colors: list[str]  # List of colors to analyze
                    ) -> dict:
                        """Analyzes color frequencies in image."""
                        return {"frequencies": {c: 0.5 for c in colors}}

                    chat = Chat(
                        model, tools=[analyze_colors]
                    )

                    # Image analysis with tool support
                    response = chat.toolloop([
                        image_bytes,
                        "What are the main colors and their frequencies?"
                    ])
                </code>
            </pattern>

            <pattern id="cached_images">
                <title>Image Caching Pattern</title>
                <explanation>
                    Optimize token usage for repeated image analysis
                </explanation>
                <code language="python">
                    # Create cached image message
                    cached_img = mk_msg(
                        [image_bytes, "Analyze this image"], cache=True
                    )

                    # First use creates cache
                    response = chat(cached_img)
                    print(f"Initial usage: {response.usage}")

                    # Subsequent uses read from cache
                    response = chat(
                        "Describe the image again"
                    )
                    print(f"Cached usage: {response.usage}")
                </code>
                <output>
                    Initial usage: In: 110; Out: 37; Cache create: 1000; Total: 1147
                    Cached usage: In: 25; Out: 42; Cache read: 1000; Total: 1067
                </output>
            </pattern>
        </patterns>

        <integration_points>
            <title>Feature Integration</title>
            <explanation>
                Images integrate with all major features:

                1. Tool Integration:
                   - Image analysis tools
                   - Result processing
                   - Multi-step analysis

                2. Caching System:
                   - Token optimization
                   - Context preservation
                   - Memory management

                3. Structured Output:
                   - Image metadata
                   - Analysis results
                   - Tool outputs
            </explanation>
            <code language="python">
                # Tool integration with caching
                def image_analyzer(data: bytes) -> dict:
                    """Analyzes image content."""
                    return {"objects": ["puppy", "flowers"]}

                chat = Chat(
                    model, tools=[image_analyzer]
                )

                # Cached tool analysis
                response = chat.toolloop(
                    [image_bytes, "Analyze this image"], cache=True
                )

                # Structured output
                from pydantic import BaseModel

                class ImageAnalysis(BaseModel):
                    objects: list[str]
                    colors: list[str]

                result = chat.struct(
                    ImageAnalysis,
                    [image_bytes, "Analyze content"]
                )
            </code>
        </integration_points>

        <error_handling>
            <title>Image Error Handling</title>
            <code language="python">
                try:
                    response = chat([
                        image_bytes,
                        "Analyze this image"
                    ])
                except ImageFormatError as e:
                    print(f"Invalid image format: {e.format}")
                except ImageSizeError as e:
                    print(f"Image too large: {e.size} bytes")
                except TokenLimitError as e:
                    print(f"Image tokens exceed limit: {e.usage}")
            </code>
        </error_handling>

        <best_practices>
            <list>
                - Cache images for repeated analysis
                - Use tools for specific analysis tasks
                - Monitor token usage with multiple images
                - Handle errors appropriately
                - Maintain conversation context
                - Consider structured output for analysis
            </list>
        </best_practices>

        <technical_notes>
            <list>
                - Images consume tokens based on size/complexity
                - Context is maintained across queries
                - Tools can process image data directly
                - Caching significantly reduces token usage
                - Multiple images increase token consumption
            </list>
        </technical_notes>
    </section>

    <system_patterns>
        <title>Core Integration Patterns</title>

        <mermaid>
        graph TD
            A[Chat] --> B{Feature Combinations}
            B --> C[Tools + Cache]
            B --> D[Images + Structure]
            B --> E[All Features]

            C --> F[Efficient Operations]
            D --> G[Rich Analysis]
            E --> H[Complex Workflows]

            style A fill:#e1f5fe
            style B fill:#e8f5e9
            style F,G,H fill:#fff3e0
        </mermaid>

        <key_combinations>
            <pattern id="efficient_tools">
                <code language="python">
                    # Efficient repeated tool usage
                    def complex_analysis(data: dict) -> dict:
                        """Heavy computation."""
                        return processed_result

                    chat = Chat(
                        model, tools=[complex_analysis]
                    )

                    # Cache expensive tool results
                    result = chat.toolloop(
                        "Analyze data", cache=True
                    )
                </code>
            </pattern>

            <pattern id="rich_analysis">
                <code language="python">
                    # Image analysis with structured output
                    result = chat.toolloop([
                        image_bytes,
                        "Analyze image"
                    ], return_structured=True)

                    # Access results safely
                    objects = result['objects']
                    confidence = result['confidence']
                </code>
            </pattern>

            <pattern id="complete_workflow">
                <code language="python">
                    # Combining all features
                    def analyze_image(img: bytes) -> dict:
                        """Image analysis."""
                        return analysis_result

                    chat = Chat(
                        model, tools=[analyze_image]
                    )

                    # Complex workflow
                    cached_msg = mk_msg([image_bytes, "Full analysis"], cache=True)
                    result = chat.toolloop(
                        cached_msg, return_structured=True
                    )
                </code>
            </pattern>
        </key_combinations>
    </system_patterns>




 </docs>