Last active
October 23, 2025 07:17
-
-
Save cmpute/cd2ea3b047592839ee2f80ecd1581ff3 to your computer and use it in GitHub Desktop.
OpenAI-compatible VLM API calling example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import httpx, base64, argparse, json | |
| SERVER_URL = "http://localhost:8000" | |
| MODELS_URL = SERVER_URL + "/v1/models" | |
| CHAT_URL = SERVER_URL + "/v1/chat/completions" | |
| available_models = [item['id'] for item in httpx.get(MODELS_URL).json()['data']] | |
| def payload(model: str, prompt: str, img_base64: list, temperature: float, max_tokens: int, **kwargs): | |
| content = [{ | |
| "type": "text", | |
| "text": prompt | |
| }] | |
| if img_base64: | |
| for b64code in img_base64: | |
| content.append({ | |
| "type": "image_url", | |
| "image_url": { "url": f"data:image/jpeg;base64,{b64code}" } | |
| }) | |
| data = { | |
| "model": model, | |
| "messages": [ | |
| { | |
| "content": content, | |
| "role": "user" | |
| } | |
| ], | |
| "temperature": temperature, | |
| "top_p": 1, | |
| "tools": None, | |
| "tool_choice": "none", | |
| "logprobs": False, | |
| "top_logprobs": 0, | |
| "n": 1, | |
| "max_tokens": max_tokens, | |
| "stop": None, | |
| "stream": False, | |
| "presence_penalty": 0, | |
| "frequency_penalty": 0, | |
| "user": "string", | |
| "repetition_penalty": 1, | |
| "session_id": -1, | |
| "ignore_eos": False, | |
| "skip_special_tokens": True, | |
| "top_k": 40 | |
| } | |
| return data | |
| parser = argparse.ArgumentParser(description="Call VLMs.") | |
| parser.add_argument('model', type=str, help='Model to be called. Available models:' + str(available_models)) | |
| parser.add_argument('prompt', type=str, default='Describe this image') | |
| parser.add_argument('--image', type=str, nargs='+', help='Path to the image') | |
| parser.add_argument('--timeout', type=float, default=10.0, help='Timeout for inference') | |
| parser.add_argument('-t', '--temperature', type=float, default=0.7) | |
| parser.add_argument('-m', '--max_tokens', type=int, default=None) | |
| args = parser.parse_args() | |
| assert args.model in available_models, "Invalid model! Choose from:" + str(available_models) | |
| if args.image: | |
| img_base64 = [] | |
| for image in args.image: | |
| with open(image, 'rb') as image_file: | |
| img_base64.append(base64.b64encode(image_file.read()).decode()) | |
| else: | |
| img_base64 = None | |
| content = payload(img_base64=img_base64, **vars(args)) | |
| response = httpx.post(CHAT_URL, json=content, timeout=args.timeout) | |
| print("=" * 10 + f" {response} " + "=" * 10) | |
| print(response.json()['choices'][0]['message']['content']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment