Raw data glm-4.6-results.tar.gz
| Benchmark | Kimi-K2-Instruct | GLM-4.6-FP8 |
|---|---|---|
| Overall Accuracy | 45.62% | 60.13% |
| Latency Mean | 3.32 s | 6.66 s |
Raw data glm-4.6-results.tar.gz
| Benchmark | Kimi-K2-Instruct | GLM-4.6-FP8 |
|---|---|---|
| Overall Accuracy | 45.62% | 60.13% |
| Latency Mean | 3.32 s | 6.66 s |
| import json | |
| import requests | |
| import base64 | |
| import openai | |
| import os | |
| client = openai.Client(base_url="https://llm.chutes.ai/v1", api_key=os.getenv("CHUTES_API_KEY")) | |
| prompt = """Please output the layout information from the PDF image, including each layout element's bbox, its category, and the corresponding text content within the bbox. |
You can call this endpoint and it will automatically select the most recent vllm image:
curl -XPOST https://api.chutes.ai/chutes/vllm \
-H 'content-type: application/json' \
-H 'Authorization: cpk...' \
-d '{
"tagline": "Mistral 24b Instruct",
"model": "unsloth/Mistral-Small-24B-Instruct-2501",
"public": true,| import os | |
| import base64 | |
| import openai | |
| import glob | |
| client = openai.Client(base_url="https://llm.chutes.ai/v1", api_key=os.environ["CHUTES_API_KEY"]) | |
| image_base64s = [] | |
| for path in glob.glob("/home/jdurbin/Downloads/logo*.png")[:8]: | |
| with open(path, "rb") as infile: |
| import os | |
| import base64 | |
| import openai | |
| import glob | |
| client = openai.Client(base_url="https://llm.chutes.ai/v1", api_key=os.environ["CHUTES_API_KEY"]) | |
| image_base64s = [] | |
| for path in glob.glob("/home/jdurbin/Downloads/logo*.png")[:8]: | |
| with open(path, "rb") as infile: |
| import os | |
| import requests | |
| import base64 | |
| audio = base64.b64encode(open("test.wav", "rb").read()).decode() | |
| result = requests.post( | |
| "https://chutes-spark-tts.chutes.ai/speak", | |
| json={ | |
| "text": "How much wood would a woodchuck chuck if a woodchuck could chuck wood?", | |
| "sample_audio_b64": audio, |
| import os | |
| import requests | |
| import base64 | |
| audio = base64.b64encode(open("test.wav", "rb").read()).decode() | |
| result = requests.post( | |
| "https://chutes-csm-1b.chutes.ai/speak", | |
| json={ | |
| "speaker": 1, | |
| "context": [ |
| { | |
| "id": "27ab0d1289814bb28c7c30e38a98df8d", | |
| "object": "chat.completion", | |
| "created": 1742109451, | |
| "model": "cognitivecomputations/Dolphin3.0-Mistral-24B", | |
| "choices": [ | |
| { | |
| "index": 0, | |
| "message": { | |
| "role": "assistant", |
chutes (and bittensor if you don't already have a coldkey/hotkey)python3 -m venv chutes-venv
source chutes-venv/bin/activate
pip install chutes 'bittensor<8'