Created
November 10, 2025 12:53
-
-
Save BexTuychiev/6c3839b2c0953c93624f4ba9bfe70135 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Multi-Model Comparison Chat: Kimi K2 Thinking, GPT-5, and Claude Sonnet 4.5 side-by-side""" | |
| import os | |
| import time | |
| from typing import Dict, List, Optional | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from dotenv import load_dotenv | |
| import streamlit as st | |
| from openai import OpenAI | |
| from anthropic import Anthropic | |
| load_dotenv() | |
| MODEL_CONFIGS = { | |
| "Kimi K2 Thinking": {"icon": "π"}, | |
| "GPT-5": {"icon": "π§ "}, | |
| "Claude Sonnet 4.5": {"icon": "π―"} | |
| } | |
| KIMI_CLIENT = OpenAI( | |
| api_key=os.getenv("OPENROUTER_API_KEY"), | |
| base_url="https://openrouter.ai/api/v1" | |
| ) | |
| GPT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| CLAUDE_CLIENT = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) | |
| def error_response(error_msg: str) -> Dict: | |
| """Return standardized error response.""" | |
| return { | |
| "content": None, "reasoning_content": None, "response_time": 0, | |
| "tokens_used": {"input": 0, "output": 0, "total": 0}, "error": error_msg | |
| } | |
| def call_model(model_name: str, messages: List[Dict], **kwargs) -> Dict: | |
| """Call any model with unified interface.""" | |
| try: | |
| start_time = time.time() | |
| if model_name == "Kimi K2 Thinking": | |
| enable_thinking = kwargs.get("enable_thinking", True) | |
| # Kimi K2 uses standard OpenAI-compatible API via OpenRouter | |
| completion = KIMI_CLIENT.chat.completions.create( | |
| model="moonshotai/kimi-k2-thinking", | |
| messages=messages, | |
| temperature=1.0, # Recommended for reasoning models | |
| extra_body={"include_reasoning": True} | |
| ) | |
| message = completion.choices[0].message | |
| content = message.content | |
| # Extract reasoning from the dedicated 'reasoning' field | |
| reasoning_content = None | |
| if enable_thinking and hasattr(message, 'reasoning') and message.reasoning: | |
| reasoning_content = message.reasoning | |
| return { | |
| "content": content, | |
| "reasoning_content": reasoning_content, | |
| "response_time": time.time() - start_time, | |
| "tokens_used": { | |
| "input": completion.usage.prompt_tokens, | |
| "output": completion.usage.completion_tokens, | |
| "total": completion.usage.total_tokens | |
| }, | |
| "error": None | |
| } | |
| elif model_name == "GPT-5": | |
| reasoning_effort = kwargs.get("reasoning_effort", "medium") | |
| input_messages = [{"role": m["role"], "content": m["content"]} for m in messages] | |
| response = GPT_CLIENT.responses.create( | |
| model="gpt-5", input=input_messages, | |
| reasoning={"effort": reasoning_effort, "summary": "auto"} | |
| ) | |
| reasoning_text, content_text = None, "" | |
| for item in response.output: | |
| if item.type == "reasoning" and hasattr(item, "summary"): | |
| summaries = [s.text for s in item.summary if hasattr(s, "text")] | |
| reasoning_text = "\n\n".join(summaries) if summaries else None | |
| elif item.type == "message" and hasattr(item, "content"): | |
| content_text += "".join(c.text for c in item.content if hasattr(c, "text")) | |
| return { | |
| "content": content_text, | |
| "reasoning_content": reasoning_text, | |
| "response_time": time.time() - start_time, | |
| "tokens_used": { | |
| "input": getattr(response.usage, "input_tokens", 0), | |
| "output": getattr(response.usage, "output_tokens", 0), | |
| "total": getattr(response.usage, "total_tokens", 0) | |
| }, | |
| "error": None | |
| } | |
| else: # Claude Sonnet 4.5 | |
| enable_thinking = kwargs.get("enable_thinking", True) | |
| params = {"model": "claude-sonnet-4-5", "max_tokens": 10000, "messages": messages} | |
| if enable_thinking: | |
| params["thinking"] = {"type": "enabled", "budget_tokens": 5000} | |
| message = CLAUDE_CLIENT.messages.create(**params) | |
| content_text, thinking_text = "", None | |
| for block in message.content: | |
| if block.type == "thinking": | |
| thinking_text = block.thinking | |
| elif block.type == "text": | |
| content_text += block.text | |
| return { | |
| "content": content_text, | |
| "reasoning_content": thinking_text, | |
| "response_time": time.time() - start_time, | |
| "tokens_used": { | |
| "input": message.usage.input_tokens, | |
| "output": message.usage.output_tokens, | |
| "total": message.usage.input_tokens + message.usage.output_tokens | |
| }, | |
| "error": None | |
| } | |
| except Exception as e: | |
| return error_response(f"{model_name} Error: {str(e)}") | |
| def render_response_card(model_name: str, response_data: Dict): | |
| """Render model response with thinking process.""" | |
| st.markdown(f"### {MODEL_CONFIGS[model_name]['icon']} {model_name}") | |
| if response_data["error"]: | |
| return st.error(response_data["error"]) | |
| if response_data["reasoning_content"]: | |
| with st.expander("π§ Thinking Process", expanded=False): | |
| st.markdown(response_data["reasoning_content"]) | |
| st.markdown(response_data["content"]) | |
| def call_models_parallel(messages: List[Dict], selected_models: List[str], | |
| kimi_thinking: bool, gpt5_reasoning: str, claude_thinking: bool) -> Dict[str, Dict]: | |
| """Call multiple models in parallel.""" | |
| model_kwargs = { | |
| "Kimi K2 Thinking": {"enable_thinking": kimi_thinking}, | |
| "GPT-5": {"reasoning_effort": gpt5_reasoning}, | |
| "Claude Sonnet 4.5": {"enable_thinking": claude_thinking} | |
| } | |
| with ThreadPoolExecutor(max_workers=3) as executor: | |
| futures = { | |
| executor.submit(call_model, m, messages, **model_kwargs[m]): m | |
| for m in selected_models if m in model_kwargs | |
| } | |
| return {futures[f]: f.result() for f in as_completed(futures)} | |
| def main(): | |
| """Main Streamlit application.""" | |
| st.set_page_config( | |
| page_title="Multi-Model Comparison Chat", page_icon="π€", | |
| layout="wide", initial_sidebar_state="expanded" | |
| ) | |
| st.session_state.setdefault("messages", []) | |
| st.session_state.setdefault("model_responses", []) | |
| st.title("π€ Multi-Model Comparison Chat") | |
| st.markdown(""" | |
| Compare **Kimi K2 Thinking**, **GPT-5**, and **Claude Sonnet 4.5** side-by-side. | |
| All three models support reasoning/thinking modes - see how they approach problems differently. | |
| """) | |
| # Always use all three models | |
| selected_models = ["Kimi K2 Thinking", "GPT-5", "Claude Sonnet 4.5"] | |
| with st.sidebar: | |
| st.header("βοΈ Settings") | |
| st.subheader("Thinking Mode") | |
| kimi_thinking = st.checkbox("Enable Kimi K2 Thinking", value=True) | |
| gpt5_reasoning = st.selectbox( | |
| "GPT-5 Reasoning Effort", options=["minimal", "low", "medium", "high"], | |
| index=2, help="Higher effort = better quality but slower and more expensive" | |
| ) | |
| claude_thinking = st.checkbox("Enable Claude Thinking", value=True) | |
| st.divider() | |
| st.subheader("API Status") | |
| st.markdown(f""" | |
| - Kimi K2: {"β " if os.getenv("OPENROUTER_API_KEY") else "β"} | |
| - GPT-5: {"β " if os.getenv("OPENAI_API_KEY") else "β"} | |
| - Claude: {"β " if os.getenv("ANTHROPIC_API_KEY") else "β"} | |
| """) | |
| st.divider() | |
| if st.button("ποΈ Clear Conversation", use_container_width=True): | |
| st.session_state.messages = [] | |
| st.session_state.model_responses = [] | |
| st.rerun() | |
| if st.session_state.messages: | |
| for i, msg in enumerate(st.session_state.messages): | |
| with st.chat_message(msg["role"]): | |
| st.markdown(msg["content"]) | |
| if msg["role"] == "user" and i < len(st.session_state.model_responses): | |
| responses = st.session_state.model_responses[i] | |
| if responses: | |
| cols = st.columns(len(responses)) | |
| for idx, (model_name, response_data) in enumerate(responses.items()): | |
| with cols[idx]: | |
| render_response_card(model_name, response_data) | |
| if prompt := st.chat_input("Ask a question to all models..."): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| api_messages = [{"role": m["role"], "content": m["content"]} for m in st.session_state.messages] | |
| with st.spinner("π€ Models are thinking..."): | |
| responses = call_models_parallel( | |
| api_messages, selected_models, kimi_thinking, gpt5_reasoning, claude_thinking | |
| ) | |
| st.session_state.model_responses.append(responses) | |
| cols = st.columns(len(responses)) | |
| for idx, (model_name, response_data) in enumerate(responses.items()): | |
| with cols[idx]: | |
| render_response_card(model_name, response_data) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment