BexTuychiev · November 10, 2025 12:53
diff --git a/model-comparison-kimik2.py b/model-comparison-kimik2.py
 """Multi-Model Comparison Chat: Kimi K2 Thinking, GPT-5, and Claude Sonnet 4.5 side-by-side"""

 import os
 import time
 from typing import Dict, List, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dotenv import load_dotenv
 import streamlit as st
 from openai import OpenAI
 from anthropic import Anthropic

 load_dotenv()

 MODEL_CONFIGS = {
    "Kimi K2 Thinking": {"icon": "🌙"},
    "GPT-5": {"icon": "🧠"},
    "Claude Sonnet 4.5": {"icon": "🎯"}
 }

 KIMI_CLIENT = OpenAI(
    api_key=os.getenv("OPENROUTER_API_KEY"),
    base_url="https://openrouter.ai/api/v1"
 )
 GPT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 CLAUDE_CLIENT = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))


 def error_response(error_msg: str) -> Dict:
    """Return standardized error response."""
    return {
        "content": None, "reasoning_content": None, "response_time": 0,
        "tokens_used": {"input": 0, "output": 0, "total": 0}, "error": error_msg
    }


 def call_model(model_name: str, messages: List[Dict], **kwargs) -> Dict:
    """Call any model with unified interface."""
    try:
        start_time = time.time()

        if model_name == "Kimi K2 Thinking":
            enable_thinking = kwargs.get("enable_thinking", True)

            # Kimi K2 uses standard OpenAI-compatible API via OpenRouter
            completion = KIMI_CLIENT.chat.completions.create(
                model="moonshotai/kimi-k2-thinking",
                messages=messages,
                temperature=1.0,  # Recommended for reasoning models
                extra_body={"include_reasoning": True}
            )

            message = completion.choices[0].message
            content = message.content

            # Extract reasoning from the dedicated 'reasoning' field
            reasoning_content = None
            if enable_thinking and hasattr(message, 'reasoning') and message.reasoning:
                reasoning_content = message.reasoning

            return {
                "content": content,
                "reasoning_content": reasoning_content,
                "response_time": time.time() - start_time,
                "tokens_used": {
                    "input": completion.usage.prompt_tokens,
                    "output": completion.usage.completion_tokens,
                    "total": completion.usage.total_tokens
                },
                "error": None
            }

        elif model_name == "GPT-5":
            reasoning_effort = kwargs.get("reasoning_effort", "medium")
            input_messages = [{"role": m["role"], "content": m["content"]} for m in messages]

            response = GPT_CLIENT.responses.create(
                model="gpt-5", input=input_messages,
                reasoning={"effort": reasoning_effort, "summary": "auto"}
            )

            reasoning_text, content_text = None, ""
            for item in response.output:
                if item.type == "reasoning" and hasattr(item, "summary"):
                    summaries = [s.text for s in item.summary if hasattr(s, "text")]
                    reasoning_text = "\n\n".join(summaries) if summaries else None
                elif item.type == "message" and hasattr(item, "content"):
                    content_text += "".join(c.text for c in item.content if hasattr(c, "text"))

            return {
                "content": content_text,
                "reasoning_content": reasoning_text,
                "response_time": time.time() - start_time,
                "tokens_used": {
                    "input": getattr(response.usage, "input_tokens", 0),
                    "output": getattr(response.usage, "output_tokens", 0),
                    "total": getattr(response.usage, "total_tokens", 0)
                },
                "error": None
            }

        else:  # Claude Sonnet 4.5
            enable_thinking = kwargs.get("enable_thinking", True)
            params = {"model": "claude-sonnet-4-5", "max_tokens": 10000, "messages": messages}
            if enable_thinking:
                params["thinking"] = {"type": "enabled", "budget_tokens": 5000}

            message = CLAUDE_CLIENT.messages.create(**params)
            content_text, thinking_text = "", None

            for block in message.content:
                if block.type == "thinking":
                    thinking_text = block.thinking
                elif block.type == "text":
                    content_text += block.text

            return {
                "content": content_text,
                "reasoning_content": thinking_text,
                "response_time": time.time() - start_time,
                "tokens_used": {
                    "input": message.usage.input_tokens,
                    "output": message.usage.output_tokens,
                    "total": message.usage.input_tokens + message.usage.output_tokens
                },
                "error": None
            }

    except Exception as e:
        return error_response(f"{model_name} Error: {str(e)}")


 def render_response_card(model_name: str, response_data: Dict):
    """Render model response with thinking process."""
    st.markdown(f"### {MODEL_CONFIGS[model_name]['icon']} {model_name}")
    if response_data["error"]:
        return st.error(response_data["error"])
    if response_data["reasoning_content"]:
        with st.expander("🧠 Thinking Process", expanded=False):
            st.markdown(response_data["reasoning_content"])
    st.markdown(response_data["content"])


 def call_models_parallel(messages: List[Dict], selected_models: List[str],
                        kimi_thinking: bool, gpt5_reasoning: str, claude_thinking: bool) -> Dict[str, Dict]:
    """Call multiple models in parallel."""
    model_kwargs = {
        "Kimi K2 Thinking": {"enable_thinking": kimi_thinking},
        "GPT-5": {"reasoning_effort": gpt5_reasoning},
        "Claude Sonnet 4.5": {"enable_thinking": claude_thinking}
    }

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = {
            executor.submit(call_model, m, messages, **model_kwargs[m]): m
            for m in selected_models if m in model_kwargs
        }
        return {futures[f]: f.result() for f in as_completed(futures)}


 def main():
    """Main Streamlit application."""
    st.set_page_config(
        page_title="Multi-Model Comparison Chat", page_icon="🤖",
        layout="wide", initial_sidebar_state="expanded"
    )

    st.session_state.setdefault("messages", [])
    st.session_state.setdefault("model_responses", [])

    st.title("🤖 Multi-Model Comparison Chat")
    st.markdown("""
    Compare **Kimi K2 Thinking**, **GPT-5**, and **Claude Sonnet 4.5** side-by-side.
    All three models support reasoning/thinking modes - see how they approach problems differently.
    """)

    # Always use all three models
    selected_models = ["Kimi K2 Thinking", "GPT-5", "Claude Sonnet 4.5"]

    with st.sidebar:
        st.header("⚙️ Settings")

        st.subheader("Thinking Mode")
        kimi_thinking = st.checkbox("Enable Kimi K2 Thinking", value=True)
        gpt5_reasoning = st.selectbox(
            "GPT-5 Reasoning Effort", options=["minimal", "low", "medium", "high"],
            index=2, help="Higher effort = better quality but slower and more expensive"
        )
        claude_thinking = st.checkbox("Enable Claude Thinking", value=True)

        st.divider()

        st.subheader("API Status")
        st.markdown(f"""
        - Kimi K2: {"✅" if os.getenv("OPENROUTER_API_KEY") else "❌"}
        - GPT-5: {"✅" if os.getenv("OPENAI_API_KEY") else "❌"}
        - Claude: {"✅" if os.getenv("ANTHROPIC_API_KEY") else "❌"}
        """)

        st.divider()

        if st.button("🗑️ Clear Conversation", use_container_width=True):
            st.session_state.messages = []
            st.session_state.model_responses = []
            st.rerun()

    if st.session_state.messages:
        for i, msg in enumerate(st.session_state.messages):
            with st.chat_message(msg["role"]):
                st.markdown(msg["content"])

            if msg["role"] == "user" and i < len(st.session_state.model_responses):
                responses = st.session_state.model_responses[i]
                if responses:
                    cols = st.columns(len(responses))
                    for idx, (model_name, response_data) in enumerate(responses.items()):
                        with cols[idx]:
                            render_response_card(model_name, response_data)

    if prompt := st.chat_input("Ask a question to all models..."):
        st.session_state.messages.append({"role": "user", "content": prompt})

        with st.chat_message("user"):
            st.markdown(prompt)

        api_messages = [{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]

        with st.spinner("🤔 Models are thinking..."):
            responses = call_models_parallel(
                api_messages, selected_models, kimi_thinking, gpt5_reasoning, claude_thinking
            )

        st.session_state.model_responses.append(responses)

        cols = st.columns(len(responses))
        for idx, (model_name, response_data) in enumerate(responses.items()):
            with cols[idx]:
                render_response_card(model_name, response_data)


 if __name__ == "__main__":
    main()
	"""Multi-Model Comparison Chat: Kimi K2 Thinking, GPT-5, and Claude Sonnet 4.5 side-by-side"""

	import os
	import time
	from typing import Dict, List, Optional
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from dotenv import load_dotenv
	import streamlit as st
	from openai import OpenAI
	from anthropic import Anthropic

	load_dotenv()

	MODEL_CONFIGS = {
	"Kimi K2 Thinking": {"icon": "🌙"},
	"GPT-5": {"icon": "🧠"},
	"Claude Sonnet 4.5": {"icon": "🎯"}
	}

	KIMI_CLIENT = OpenAI(
	api_key=os.getenv("OPENROUTER_API_KEY"),
	base_url="https://openrouter.ai/api/v1"
	)
	GPT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	CLAUDE_CLIENT = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))


	def error_response(error_msg: str) -> Dict:
	"""Return standardized error response."""
	return {
	"content": None, "reasoning_content": None, "response_time": 0,
	"tokens_used": {"input": 0, "output": 0, "total": 0}, "error": error_msg
	}


	def call_model(model_name: str, messages: List[Dict], **kwargs) -> Dict:
	"""Call any model with unified interface."""
	try:
	start_time = time.time()

	if model_name == "Kimi K2 Thinking":
	enable_thinking = kwargs.get("enable_thinking", True)

	# Kimi K2 uses standard OpenAI-compatible API via OpenRouter
	completion = KIMI_CLIENT.chat.completions.create(
	model="moonshotai/kimi-k2-thinking",
	messages=messages,
	temperature=1.0, # Recommended for reasoning models
	extra_body={"include_reasoning": True}
	)

	message = completion.choices[0].message
	content = message.content

	# Extract reasoning from the dedicated 'reasoning' field
	reasoning_content = None
	if enable_thinking and hasattr(message, 'reasoning') and message.reasoning:
	reasoning_content = message.reasoning

	return {
	"content": content,
	"reasoning_content": reasoning_content,
	"response_time": time.time() - start_time,
	"tokens_used": {
	"input": completion.usage.prompt_tokens,
	"output": completion.usage.completion_tokens,
	"total": completion.usage.total_tokens
	},
	"error": None
	}

	elif model_name == "GPT-5":
	reasoning_effort = kwargs.get("reasoning_effort", "medium")
	input_messages = [{"role": m["role"], "content": m["content"]} for m in messages]

	response = GPT_CLIENT.responses.create(
	model="gpt-5", input=input_messages,
	reasoning={"effort": reasoning_effort, "summary": "auto"}
	)

	reasoning_text, content_text = None, ""
	for item in response.output:
	if item.type == "reasoning" and hasattr(item, "summary"):
	summaries = [s.text for s in item.summary if hasattr(s, "text")]
	reasoning_text = "\n\n".join(summaries) if summaries else None
	elif item.type == "message" and hasattr(item, "content"):
	content_text += "".join(c.text for c in item.content if hasattr(c, "text"))

	return {
	"content": content_text,
	"reasoning_content": reasoning_text,
	"response_time": time.time() - start_time,
	"tokens_used": {
	"input": getattr(response.usage, "input_tokens", 0),
	"output": getattr(response.usage, "output_tokens", 0),
	"total": getattr(response.usage, "total_tokens", 0)
	},
	"error": None
	}

	else: # Claude Sonnet 4.5
	enable_thinking = kwargs.get("enable_thinking", True)
	params = {"model": "claude-sonnet-4-5", "max_tokens": 10000, "messages": messages}
	if enable_thinking:
	params["thinking"] = {"type": "enabled", "budget_tokens": 5000}

	message = CLAUDE_CLIENT.messages.create(**params)
	content_text, thinking_text = "", None

	for block in message.content:
	if block.type == "thinking":
	thinking_text = block.thinking
	elif block.type == "text":
	content_text += block.text

	return {
	"content": content_text,
	"reasoning_content": thinking_text,
	"response_time": time.time() - start_time,
	"tokens_used": {
	"input": message.usage.input_tokens,
	"output": message.usage.output_tokens,
	"total": message.usage.input_tokens + message.usage.output_tokens
	},
	"error": None
	}

	except Exception as e:
	return error_response(f"{model_name} Error: {str(e)}")


	def render_response_card(model_name: str, response_data: Dict):
	"""Render model response with thinking process."""
	st.markdown(f"### {MODEL_CONFIGS[model_name]['icon']} {model_name}")
	if response_data["error"]:
	return st.error(response_data["error"])
	if response_data["reasoning_content"]:
	with st.expander("🧠 Thinking Process", expanded=False):
	st.markdown(response_data["reasoning_content"])
	st.markdown(response_data["content"])


	def call_models_parallel(messages: List[Dict], selected_models: List[str],
	kimi_thinking: bool, gpt5_reasoning: str, claude_thinking: bool) -> Dict[str, Dict]:
	"""Call multiple models in parallel."""
	model_kwargs = {
	"Kimi K2 Thinking": {"enable_thinking": kimi_thinking},
	"GPT-5": {"reasoning_effort": gpt5_reasoning},
	"Claude Sonnet 4.5": {"enable_thinking": claude_thinking}
	}

	with ThreadPoolExecutor(max_workers=3) as executor:
	futures = {
	executor.submit(call_model, m, messages, **model_kwargs[m]): m
	for m in selected_models if m in model_kwargs
	}
	return {futures[f]: f.result() for f in as_completed(futures)}


	def main():
	"""Main Streamlit application."""
	st.set_page_config(
	page_title="Multi-Model Comparison Chat", page_icon="🤖",
	layout="wide", initial_sidebar_state="expanded"
	)

	st.session_state.setdefault("messages", [])
	st.session_state.setdefault("model_responses", [])

	st.title("🤖 Multi-Model Comparison Chat")
	st.markdown("""
	Compare Kimi K2 Thinking, GPT-5, and Claude Sonnet 4.5 side-by-side.
	All three models support reasoning/thinking modes - see how they approach problems differently.
	""")

	# Always use all three models
	selected_models = ["Kimi K2 Thinking", "GPT-5", "Claude Sonnet 4.5"]

	with st.sidebar:
	st.header("⚙️ Settings")

	st.subheader("Thinking Mode")
	kimi_thinking = st.checkbox("Enable Kimi K2 Thinking", value=True)
	gpt5_reasoning = st.selectbox(
	"GPT-5 Reasoning Effort", options=["minimal", "low", "medium", "high"],
	index=2, help="Higher effort = better quality but slower and more expensive"
	)
	claude_thinking = st.checkbox("Enable Claude Thinking", value=True)

	st.divider()

	st.subheader("API Status")
	st.markdown(f"""
	- Kimi K2: {"✅" if os.getenv("OPENROUTER_API_KEY") else "❌"}
	- GPT-5: {"✅" if os.getenv("OPENAI_API_KEY") else "❌"}
	- Claude: {"✅" if os.getenv("ANTHROPIC_API_KEY") else "❌"}
	""")

	st.divider()

	if st.button("🗑️ Clear Conversation", use_container_width=True):
	st.session_state.messages = []
	st.session_state.model_responses = []
	st.rerun()

	if st.session_state.messages:
	for i, msg in enumerate(st.session_state.messages):
	with st.chat_message(msg["role"]):
	st.markdown(msg["content"])

	if msg["role"] == "user" and i < len(st.session_state.model_responses):
	responses = st.session_state.model_responses[i]
	if responses:
	cols = st.columns(len(responses))
	for idx, (model_name, response_data) in enumerate(responses.items()):
	with cols[idx]:
	render_response_card(model_name, response_data)

	if prompt := st.chat_input("Ask a question to all models..."):
	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("user"):
	st.markdown(prompt)

	api_messages = [{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]

	with st.spinner("🤔 Models are thinking..."):
	responses = call_models_parallel(
	api_messages, selected_models, kimi_thinking, gpt5_reasoning, claude_thinking
	)

	st.session_state.model_responses.append(responses)

	cols = st.columns(len(responses))
	for idx, (model_name, response_data) in enumerate(responses.items()):
	with cols[idx]:
	render_response_card(model_name, response_data)


	if __name__ == "__main__":
	main()
No results found