Skip to content

Instantly share code, notes, and snippets.

@cmpute
Last active October 23, 2025 07:17
Show Gist options
  • Select an option

  • Save cmpute/cd2ea3b047592839ee2f80ecd1581ff3 to your computer and use it in GitHub Desktop.

Select an option

Save cmpute/cd2ea3b047592839ee2f80ecd1581ff3 to your computer and use it in GitHub Desktop.
OpenAI-compatible VLM API calling example
import httpx, base64, argparse, json
SERVER_URL = "http://localhost:8000"
MODELS_URL = SERVER_URL + "/v1/models"
CHAT_URL = SERVER_URL + "/v1/chat/completions"
available_models = [item['id'] for item in httpx.get(MODELS_URL).json()['data']]
def payload(model: str, prompt: str, img_base64: list, temperature: float, max_tokens: int, **kwargs):
content = [{
"type": "text",
"text": prompt
}]
if img_base64:
for b64code in img_base64:
content.append({
"type": "image_url",
"image_url": { "url": f"data:image/jpeg;base64,{b64code}" }
})
data = {
"model": model,
"messages": [
{
"content": content,
"role": "user"
}
],
"temperature": temperature,
"top_p": 1,
"tools": None,
"tool_choice": "none",
"logprobs": False,
"top_logprobs": 0,
"n": 1,
"max_tokens": max_tokens,
"stop": None,
"stream": False,
"presence_penalty": 0,
"frequency_penalty": 0,
"user": "string",
"repetition_penalty": 1,
"session_id": -1,
"ignore_eos": False,
"skip_special_tokens": True,
"top_k": 40
}
return data
parser = argparse.ArgumentParser(description="Call VLMs.")
parser.add_argument('model', type=str, help='Model to be called. Available models:' + str(available_models))
parser.add_argument('prompt', type=str, default='Describe this image')
parser.add_argument('--image', type=str, nargs='+', help='Path to the image')
parser.add_argument('--timeout', type=float, default=10.0, help='Timeout for inference')
parser.add_argument('-t', '--temperature', type=float, default=0.7)
parser.add_argument('-m', '--max_tokens', type=int, default=None)
args = parser.parse_args()
assert args.model in available_models, "Invalid model! Choose from:" + str(available_models)
if args.image:
img_base64 = []
for image in args.image:
with open(image, 'rb') as image_file:
img_base64.append(base64.b64encode(image_file.read()).decode())
else:
img_base64 = None
content = payload(img_base64=img_base64, **vars(args))
response = httpx.post(CHAT_URL, json=content, timeout=args.timeout)
print("=" * 10 + f" {response} " + "=" * 10)
print(response.json()['choices'][0]['message']['content'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment