Skip to content

Instantly share code, notes, and snippets.

@davidgilbertson
Last active January 15, 2025 04:32
Show Gist options
  • Save davidgilbertson/d03115a27c3d1f35d35609ebf558df53 to your computer and use it in GitHub Desktop.
Save davidgilbertson/d03115a27c3d1f35d35609ebf558df53 to your computer and use it in GitHub Desktop.
A function to embed texts with OpenAI, with concurrency and allowance for max inputs and max token length
from typing import Literal
import asyncio
import numpy as np
from openai import AsyncOpenAI
import tiktoken
def embed(
texts: list[str],
model: Literal[
"text-embedding-3-small",
"text-embedding-3-large",
] = "text-embedding-3-small",
dimensions: int = None,
) -> np.ndarray:
MAX_BATCH = 2048 # API limit
MAX_TOKENS = 8191 # API limit
client = AsyncOpenAI()
# Limit the number of tokens per text
# We'll pass these tokens directly to the API
tokenizer = tiktoken.get_encoding("cl100k_base")
tokens = [tokenizer.encode(t)[:MAX_TOKENS] for t in texts]
# Fetch embeddings for a single batch
async def embed_batch(**kwargs) -> list[list[float]]:
response = await client.embeddings.create(**kwargs)
return [r.embedding for r in response.data]
# Split and process inputs in parallel batches
async def async_embed() -> np.ndarray:
coros = []
for i in range(0, len(tokens), MAX_BATCH):
batch = tokens[i : i + MAX_BATCH]
kwargs = dict(input=batch, model=model)
if dimensions:
kwargs["dimensions"] = dimensions
coros.append(embed_batch(**kwargs))
responses = await asyncio.gather(*coros)
embeddings = [emb for response in responses for emb in response]
return np.asarray(embeddings)
return asyncio.run(async_embed())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment