Skip to content

Instantly share code, notes, and snippets.

@vroomfondel
Last active June 2, 2025 10:41
Show Gist options
  • Save vroomfondel/eb1fb4ac3319b22f9dc3d9ff658ce0c9 to your computer and use it in GitHub Desktop.
Save vroomfondel/eb1fb4ac3319b22f9dc3d9ff658ce0c9 to your computer and use it in GitHub Desktop.
import httpx
import json
from httpx import ByteStream
from openai import OpenAI
import instructor
from pydantic import BaseModel
from loguru import logger
HOST: str = "http://127.0.0.1:11434"
OLLAMA_MODEL: str = "hermes3:8b-llama3.1-fp16"
class InstructorOpenAIOllamaOverride: # metaclass=Singleton):
logger = logger.bind(classname=__qualname__)
def __init__(self, host: str, options: dict, print_request: bool, print_response: bool, print_http_request: bool, print_http_response: bool, think_flag: bool|None = None):
self.host = host
self.options = options
self.print_request = print_request
self.print_response = print_response
self.print_http_request = print_http_request,
self.print_http_response = print_http_response
self.think_flag = think_flag
@classmethod
def get_instructor_client(cls,
host: str=_HOST,
options: Optional[dict] = None,
print_request: bool = False,
print_response: bool = False,
print_http_request: bool = False,
print_http_response: bool = False,
think_flag: bool|None = None) -> instructor.Instructor:
if options is None:
options = _OLLAMA_OPTIONS
ioao: InstructorOpenAIOllamaOverride = InstructorOpenAIOllamaOverride(
host=host,
options=options,
print_request=print_request,
print_response=print_response,
print_http_request=print_http_request,
print_http_response=print_http_response,
think_flag=think_flag
)
meclient = httpx.Client(
event_hooks={
'request': [
# partial(cls.modify_request, host = host, options = options)
ioao.modify_request
],
'response': [
# partial(cls.modify_respone, host = host, options = options)
ioao.modify_response
]
}
)
client: instructor.Instructor = instructor.from_openai(
OpenAI(
http_client=meclient,
base_url=f"{host}/BLARGHNOTEXISTFAILCHECK",
api_key="ollama", # required, but unused
),
mode=instructor.Mode.JSON
)
return client
def modify_request(self, request: httpx.Request):
logger = self.__class__.logger
if self.print_http_request:
logger.debug(f"\nRequest event hook MODIFY: {request.method} {request.url} - Waiting for response")
logger.debug(f"{type(request)=} {request=}")
logger.debug(f"{type(request._content)=} {request.content=}")
logger.debug(f"{type(request.stream)=} {request.stream=}")
post_content: dict = json.loads(request.content)
if self.print_request:
logger.debug(f"REQ_CONTENT_OLD_PARSED: {json.dumps(post_content, indent=2, sort_keys=False, default=str)}")
post_content_new: dict = {
"model": post_content["model"],
"messages": post_content["messages"],
"tools": [],
"stream": False,
"options": self.options,
"keep_alive": 300,
}
if self.think_flag is not None:
post_content_new["think"] = self.think_flag
if _OLLAMA_FORMAT_REQUEST:
post_content_new["format"] = _OLLAMA_FORMAT_REQUEST
request.json = post_content_new # just to be sure...
# rebuilding stream
# .venv/lib/python3.12/site-packages/httpx/_transports/default.py
content_type: str | None = request.headers.get("content-type")
headers, stream = httpx._content.encode_request(
content=None,
data=None,
files=None,
json=post_content_new,
boundary=httpx._models.get_multipart_boundary_from_content_type(
content_type=content_type.encode(request.headers.encoding)
if content_type
else None
),
)
request.headers["Content-Length"] = headers["Content-Length"]
request._prepare(headers)
request.stream = stream
# Load the request body, except for streaming content.
request.__delattr__("_content")
if isinstance(stream, ByteStream):
request.read()
# /rebuilding stream
# import traceback
# traceback.print_stack()
if self.print_request:
logger.debug(f"REQ_CONTENT_NEW_PARSED: {json.dumps(post_content_new, indent=2, sort_keys=False, default=str)}")
if self.print_http_request:
logger.debug(f"REQ_OLD {type(request.url)=} {request.url=}")
request.url = httpx.URL(f"{self.host}/api/chat") # could be necessary to actually check chat-mode ?!
logger.debug(f"REQ_NEW {type(request.url)=} {request.url=}")
def modify_response(self, response: httpx.Response):
logger = self.__class__.logger
response.read()
resp_content: dict = json.loads(response.content)
if self.print_response:
logger.debug(f"RESPONSE_CONTENT BEFORE:\n{json.dumps(resp_content, indent=2, sort_keys=False, default=str)}")
uni: int = int(datetime.fromisoformat(resp_content["created_at"]).timestamp())
mimic_openai: dict = {
"id": f"rambling-{uni}",
"object": "chat.completion",
"created": uni,
"model": resp_content["model"],
"system_fingerprint": "thumb",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": resp_content["message"]["content"],
},
"finish_reason": resp_content["done_reason"],
}
],
"usage": {
"prompt_tokens": resp_content["prompt_eval_count"],
"completion_tokens": resp_content["eval_count"],
"total_tokens": resp_content["prompt_eval_count"] + resp_content["eval_count"],
},
}
if self.think_flag and "thinking" in resp_content["message"]:
mimic_openai["choices"][0]["message"]["reasoning_content"] = resp_content["message"]["thinking"]
response._content = json.dumps(mimic_openai).encode()
if self.print_response:
logger.debug(f"RESPONSE_CONTENT MODIFIED:\n{json.dumps(mimic_openai, indent=2, sort_keys=False, default=str)}")
def main():
from pydantic import BaseModel
class User(BaseModel):
name: str
age: int
instructor_client = InstructorOpenAIOllamaOverride.get_instructor_client(host=_HOST, options=_OLLAMA_OPTIONS, think_flag=True, print_request=True, print_response=True)
resp, comp = instructor_client.create_with_completion(
stream=False,
model=_OLLAMA_MODEL,
messages=[
# {"role": "system", "content": "You are a pirate."},
# {
# "role": "user",
# "content": "Tell me about the Harry Potter",
# }
{"role": "user", "content": "Create a user with a funny name"}
],
response_model=User,
)
logger.debug(f"{type(comp)=}")
logger.debug(comp.model_dump_json(indent=2))
# logger.debug(f"{type(completion)=} {completion=}")
logger.debug(f"{type(resp)=}")
logger.debug(resp.model_dump_json(indent=2))
if __name__ == "__main__":
main()
@vroomfondel
Copy link
Author

This might not work anymore 😞

INFO:     127.0.0.1:57816 - "POST /generate/response HTTP/1.1" 500 Internal Server Error
ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/home/pln/.virtualenvs/conv/lib/python3.12/site-packages/instructor/retry.py", line 161, in retry_sync
    response = func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^
  File "/home/pln/.virtualenvs/conv/lib/python3.12/site-packages/openai/_utils/_utils.py", line 274, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
TypeError: Completions.create() got an unexpected keyword argument 'http_client'

sorry, cannot replicate your error.

i am using these versions - which ones are you using?

pip list|egrep '(^openai|instructor)'
instructor                               1.7.2
openai                                   1.64.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment