Skip to content

Instantly share code, notes, and snippets.

@BexTuychiev
Created November 10, 2025 12:53
Show Gist options
  • Select an option

  • Save BexTuychiev/6c3839b2c0953c93624f4ba9bfe70135 to your computer and use it in GitHub Desktop.

Select an option

Save BexTuychiev/6c3839b2c0953c93624f4ba9bfe70135 to your computer and use it in GitHub Desktop.
"""Multi-Model Comparison Chat: Kimi K2 Thinking, GPT-5, and Claude Sonnet 4.5 side-by-side"""
import os
import time
from typing import Dict, List, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from dotenv import load_dotenv
import streamlit as st
from openai import OpenAI
from anthropic import Anthropic
load_dotenv()
MODEL_CONFIGS = {
"Kimi K2 Thinking": {"icon": "πŸŒ™"},
"GPT-5": {"icon": "🧠"},
"Claude Sonnet 4.5": {"icon": "🎯"}
}
KIMI_CLIENT = OpenAI(
api_key=os.getenv("OPENROUTER_API_KEY"),
base_url="https://openrouter.ai/api/v1"
)
GPT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
CLAUDE_CLIENT = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
def error_response(error_msg: str) -> Dict:
"""Return standardized error response."""
return {
"content": None, "reasoning_content": None, "response_time": 0,
"tokens_used": {"input": 0, "output": 0, "total": 0}, "error": error_msg
}
def call_model(model_name: str, messages: List[Dict], **kwargs) -> Dict:
"""Call any model with unified interface."""
try:
start_time = time.time()
if model_name == "Kimi K2 Thinking":
enable_thinking = kwargs.get("enable_thinking", True)
# Kimi K2 uses standard OpenAI-compatible API via OpenRouter
completion = KIMI_CLIENT.chat.completions.create(
model="moonshotai/kimi-k2-thinking",
messages=messages,
temperature=1.0, # Recommended for reasoning models
extra_body={"include_reasoning": True}
)
message = completion.choices[0].message
content = message.content
# Extract reasoning from the dedicated 'reasoning' field
reasoning_content = None
if enable_thinking and hasattr(message, 'reasoning') and message.reasoning:
reasoning_content = message.reasoning
return {
"content": content,
"reasoning_content": reasoning_content,
"response_time": time.time() - start_time,
"tokens_used": {
"input": completion.usage.prompt_tokens,
"output": completion.usage.completion_tokens,
"total": completion.usage.total_tokens
},
"error": None
}
elif model_name == "GPT-5":
reasoning_effort = kwargs.get("reasoning_effort", "medium")
input_messages = [{"role": m["role"], "content": m["content"]} for m in messages]
response = GPT_CLIENT.responses.create(
model="gpt-5", input=input_messages,
reasoning={"effort": reasoning_effort, "summary": "auto"}
)
reasoning_text, content_text = None, ""
for item in response.output:
if item.type == "reasoning" and hasattr(item, "summary"):
summaries = [s.text for s in item.summary if hasattr(s, "text")]
reasoning_text = "\n\n".join(summaries) if summaries else None
elif item.type == "message" and hasattr(item, "content"):
content_text += "".join(c.text for c in item.content if hasattr(c, "text"))
return {
"content": content_text,
"reasoning_content": reasoning_text,
"response_time": time.time() - start_time,
"tokens_used": {
"input": getattr(response.usage, "input_tokens", 0),
"output": getattr(response.usage, "output_tokens", 0),
"total": getattr(response.usage, "total_tokens", 0)
},
"error": None
}
else: # Claude Sonnet 4.5
enable_thinking = kwargs.get("enable_thinking", True)
params = {"model": "claude-sonnet-4-5", "max_tokens": 10000, "messages": messages}
if enable_thinking:
params["thinking"] = {"type": "enabled", "budget_tokens": 5000}
message = CLAUDE_CLIENT.messages.create(**params)
content_text, thinking_text = "", None
for block in message.content:
if block.type == "thinking":
thinking_text = block.thinking
elif block.type == "text":
content_text += block.text
return {
"content": content_text,
"reasoning_content": thinking_text,
"response_time": time.time() - start_time,
"tokens_used": {
"input": message.usage.input_tokens,
"output": message.usage.output_tokens,
"total": message.usage.input_tokens + message.usage.output_tokens
},
"error": None
}
except Exception as e:
return error_response(f"{model_name} Error: {str(e)}")
def render_response_card(model_name: str, response_data: Dict):
"""Render model response with thinking process."""
st.markdown(f"### {MODEL_CONFIGS[model_name]['icon']} {model_name}")
if response_data["error"]:
return st.error(response_data["error"])
if response_data["reasoning_content"]:
with st.expander("🧠 Thinking Process", expanded=False):
st.markdown(response_data["reasoning_content"])
st.markdown(response_data["content"])
def call_models_parallel(messages: List[Dict], selected_models: List[str],
kimi_thinking: bool, gpt5_reasoning: str, claude_thinking: bool) -> Dict[str, Dict]:
"""Call multiple models in parallel."""
model_kwargs = {
"Kimi K2 Thinking": {"enable_thinking": kimi_thinking},
"GPT-5": {"reasoning_effort": gpt5_reasoning},
"Claude Sonnet 4.5": {"enable_thinking": claude_thinking}
}
with ThreadPoolExecutor(max_workers=3) as executor:
futures = {
executor.submit(call_model, m, messages, **model_kwargs[m]): m
for m in selected_models if m in model_kwargs
}
return {futures[f]: f.result() for f in as_completed(futures)}
def main():
"""Main Streamlit application."""
st.set_page_config(
page_title="Multi-Model Comparison Chat", page_icon="πŸ€–",
layout="wide", initial_sidebar_state="expanded"
)
st.session_state.setdefault("messages", [])
st.session_state.setdefault("model_responses", [])
st.title("πŸ€– Multi-Model Comparison Chat")
st.markdown("""
Compare **Kimi K2 Thinking**, **GPT-5**, and **Claude Sonnet 4.5** side-by-side.
All three models support reasoning/thinking modes - see how they approach problems differently.
""")
# Always use all three models
selected_models = ["Kimi K2 Thinking", "GPT-5", "Claude Sonnet 4.5"]
with st.sidebar:
st.header("βš™οΈ Settings")
st.subheader("Thinking Mode")
kimi_thinking = st.checkbox("Enable Kimi K2 Thinking", value=True)
gpt5_reasoning = st.selectbox(
"GPT-5 Reasoning Effort", options=["minimal", "low", "medium", "high"],
index=2, help="Higher effort = better quality but slower and more expensive"
)
claude_thinking = st.checkbox("Enable Claude Thinking", value=True)
st.divider()
st.subheader("API Status")
st.markdown(f"""
- Kimi K2: {"βœ…" if os.getenv("OPENROUTER_API_KEY") else "❌"}
- GPT-5: {"βœ…" if os.getenv("OPENAI_API_KEY") else "❌"}
- Claude: {"βœ…" if os.getenv("ANTHROPIC_API_KEY") else "❌"}
""")
st.divider()
if st.button("πŸ—‘οΈ Clear Conversation", use_container_width=True):
st.session_state.messages = []
st.session_state.model_responses = []
st.rerun()
if st.session_state.messages:
for i, msg in enumerate(st.session_state.messages):
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if msg["role"] == "user" and i < len(st.session_state.model_responses):
responses = st.session_state.model_responses[i]
if responses:
cols = st.columns(len(responses))
for idx, (model_name, response_data) in enumerate(responses.items()):
with cols[idx]:
render_response_card(model_name, response_data)
if prompt := st.chat_input("Ask a question to all models..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
api_messages = [{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]
with st.spinner("πŸ€” Models are thinking..."):
responses = call_models_parallel(
api_messages, selected_models, kimi_thinking, gpt5_reasoning, claude_thinking
)
st.session_state.model_responses.append(responses)
cols = st.columns(len(responses))
for idx, (model_name, response_data) in enumerate(responses.items()):
with cols[idx]:
render_response_card(model_name, response_data)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment