Skip to content

Instantly share code, notes, and snippets.

@ChrisSwanson
Created August 12, 2025 18:55
Show Gist options
  • Save ChrisSwanson/523b73681339962cbb4cd627d6ab2da8 to your computer and use it in GitHub Desktop.
Save ChrisSwanson/523b73681339962cbb4cd627d6ab2da8 to your computer and use it in GitHub Desktop.
OpenAI compatible embedding service
from fastapi import FastAPI, Request, HTTPException
from transformers import AutoTokenizer, AutoModel
import torch
import os
import uvicorn
app = FastAPI()
API_KEY = os.environ.get("API_KEY")
tokenizer = AutoTokenizer.from_pretrained("nomic-ai/modernbert-embed-base")
model = AutoModel.from_pretrained("nomic-ai/modernbert-embed-base")
@app.post("/v1/embeddings")
async def create_embedding(request: Request):
if API_KEY and request.headers.get("x-api-key") != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API key")
body = await request.json()
inputs = body["input"]
if isinstance(inputs, str):
inputs = [inputs]
data = []
prompt_tokens = 0
for idx, text in enumerate(inputs):
tokens = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
prompt_tokens += len(tokens["input_ids"][0])
with torch.no_grad():
outputs = model(**tokens)
# Example: mean-pooling last_hidden_state
embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy().tolist()
data.append({
"object": "embedding",
"embedding": embedding,
"index": idx
})
response = {
"object": "list",
"data": data,
"model": body["model"],
"usage": {
"prompt_tokens": prompt_tokens,
"total_tokens": prompt_tokens
}
}
return response
if __name__ == "__main__":
uvicorn.run(app, host="localhost", port=11435)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment