Skip to content

Instantly share code, notes, and snippets.

@qpwo
Created May 14, 2025 12:03
Show Gist options
  • Save qpwo/093aec150b62104c60b45d61b2394a09 to your computer and use it in GitHub Desktop.
Save qpwo/093aec150b62104c60b45d61b2394a09 to your computer and use it in GitHub Desktop.
openrouter minimal working example of streaming with chosen provider
#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import sys
from typing import Iterable, Literal, Sequence
import requests
from dotenv import load_dotenv
load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not OPENROUTER_API_KEY:
sys.exit("Set the OPENROUTER_API_KEY environment variable first.")
URL = "https://openrouter.ai/api/v1/chat/completions"
DEFAULT_HEADERS = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://example.com",
"X-Title": "Parasail streaming demo via Python",
}
# https://openrouter.ai/docs/features/provider-routing#json-schema-for-provider-preferences
Provider = Literal[ "AnyScale", "HuggingFace", "Hyperbolic 2", "Lepton", "Lynn 2", "Lynn", "Modal", "OctoAI", "Recursal", "Reflection", "Replicate", "SambaNova 2", "SF Compute", "Together 2", "01.AI", "AI21", "AionLabs", "Alibaba", "Amazon Bedrock", "Anthropic", "Atoma", "Avian", "Azure", "BaseTen", "Cent-ML", "Cerebras", "Chutes", "Cloudflare", "Cohere", "Crusoe", "DeepInfra", "DeepSeek", "Enfer", "Featherless", "Fireworks", "Friendli", "GMICloud", "Google", "Google AI Studio", "Groq", "Hyperbolic", "Inception", "InferenceNet", "Infermatic", "Inflection", "InoCloud", "Kluster", "Lambda", "Liquid", "Mancer", "Mancer 2", "Meta", "Minimax", "Mistral", "NCompass", "Nebius", "NextBit", "Nineteen", "Novita", "OpenAI", "OpenInference", "Parasail", "Perplexity", "Phala", "SambaNova", "Stealth", "Targon", "Together", "Ubicloud", "Venice", "xAI" ]
# https://openrouter.ai/docs/api-reference/streaming
def stream_completion(
messages: Sequence[dict[str, str]] | str,
model: str,
providers: Iterable[Provider] | None = None,
temperature: float | None = None,
max_completion_tokens: int | None = None,
):
if isinstance(messages, str):
messages = [{"role": "user", "content": messages}]
payload = {"model": model, "messages": messages, "stream": True}
# https://openrouter.ai/docs/features/provider-routing
if providers is not None:
payload["provider"] = {"only": list(providers), "allow_fallbacks": False}
if temperature is not None:
payload["temperature"] = temperature
if max_completion_tokens is not None:
payload["max_tokens"] = max_completion_tokens
try:
with requests.post(URL, headers=DEFAULT_HEADERS, json=payload, stream=True, timeout=60) as resp:
try:
resp.raise_for_status()
except requests.exceptions.HTTPError as err:
print(f"HTTP {resp.status_code} error", file=sys.stderr)
print(resp.text, file=sys.stderr)
# raise err
return
buffer = ""
for chunk in resp.iter_content(chunk_size=1024, decode_unicode=True):
buffer += chunk
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.strip()
if not line.startswith("data: "):
continue
data = line[6:]
if data == "[DONE]":
return
try:
obj = json.loads(data)
except json.JSONDecodeError:
continue
delta = obj["choices"][0]["delta"].get("content")
if delta:
yield delta
except requests.exceptions.RequestException as err:
print("Request failed:", err, file=sys.stderr)
raise
if __name__ == "__main__":
prompt = "Write a short haiku about the sea."
print(f"User:\n{prompt}\n\nAssistant:")
for token in stream_completion(
messages=prompt,
model="deepseek/deepseek-chat-v3-0324",
providers=("Parasail",),
temperature=0.2,
max_completion_tokens=64,
):
print(token, end="", flush=True)
print("\n\n— done —")
@qpwo
Copy link
Author

qpwo commented May 14, 2025

you can test if it works by putting a typo in the provider name and see if api returns error

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment