Skip to content

Instantly share code, notes, and snippets.

@filmo
Created December 10, 2025 00:56
Show Gist options
  • Select an option

  • Save filmo/e51d363feba69e0a25a90e7cf3eda4cf to your computer and use it in GitHub Desktop.

Select an option

Save filmo/e51d363feba69e0a25a90e7cf3eda4cf to your computer and use it in GitHub Desktop.
from typing import List, Optional, Dict, Any
from llama_index.core.callbacks.base_handler import BaseCallbackHandler
from llama_index.core.callbacks import CBEventType, EventPayload
from pprint import pprint
import uuid
"""
For recovering metadata from the 'raw' part of the ChatCompletion
"""
# noinspection PyAbstractClass
class CostLoggingCallback(BaseCallbackHandler):
"""Logs token usage & cost for each LLM completion."""
def __init__(
self,
print_output = False,
provider = False,
event_starts_to_ignore: List[CBEventType] = None,
event_ends_to_ignore: List[CBEventType] = None,
):
super().__init__(
event_starts_to_ignore or [],
event_ends_to_ignore or [],
)
self.print_output = print_output
self.provider = provider
self.records = [] # optional: store logs here
def on_llm_end(self, output, **kwargs):
raw = getattr(output, "raw", None)
def g(obj, *path, default=None):
for key in path:
if obj is None:
return default
if hasattr(obj, key):
obj = getattr(obj, key)
elif isinstance(obj, dict) and key in obj:
obj = obj[key]
else:
return default
return obj
data = {
"model": g(raw, "model"),
"provider": g(raw, "provider"),
"prompt_tokens": g(raw, "usage", "prompt_tokens"),
"completion_tokens": g(raw, "usage", "completion_tokens"),
"total_tokens": g(raw, "usage", "total_tokens"),
"cost": g(raw, "usage", "cost"),
"id": g(raw, "id"),
"created_at": g(raw,"created_at")
}
if data['id'] is None:
data['id'] = str(uuid.uuid4())
if self.provider == "ollama":
data['provider'] = 'Ollama'
# Do whatever you want with the info:
if self.print_output:
print("LLM Usage:")
pprint(data)
self.records.append(data)
# ----------------------------------------------------------------------
# REQUIRED ABSTRACT METHODS — provide no-op implementations
# ----------------------------------------------------------------------
def start_trace(self, trace_id: Optional[str] = None):
"""Required by BaseCallbackHandler; safe no-op."""
# print(f'CostLoggingCallback: start_trace called: {trace_id}')
pass
def end_trace(self, trace_id: Optional[str] = None, trace_map: Optional[Dict[str, List[str]]] = None,):
"""Required by BaseCallbackHandler; safe no-op."""
# print('CostLoggingCallback: end_trace called')
# pprint(trace_map)
# print('--- end of end_trace ---')
pass
def on_event_start(self, event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
parent_id: str = "",
**kwargs: Any,
):
print('---- on_event_start -----')
oes = payload.get(EventPayload.PROMPT)
if oes:
print(f"Prompt: {oes}")
else:
for k in payload.keys():
print(f"on_event_start key={k}")
print('---- end of on_event_start -----')
# print('CostLoggingCallback: on_event_start called')
"""Required by BaseCallbackHandler; safe no-op."""
pass
def on_event_end(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any,
):
print(f'-> on_event_end: {event_type}')
completion = payload.get(EventPayload.COMPLETION)
chunks = payload.get(CBEventType.CHUNKING)
if completion:
# pprint(result)
self.on_llm_end(output=completion)
elif chunks:
print(f"on_event_end: caught {event_type}")
print("=-"*10)
else:
for k in payload.keys():
print(f"on_event_end: caught {k}")
# but when I use the same callback with a more sophisticated llama_index workflow as shown below
# it seems to only capure 'CHUNKS' being sent to the model but none of the results of the underlying LLM calls
# at the end, the 'records' stored in the callback handler is still empty because it seems
# like on_event_end is never triggered. For example the "print(f'-> on_event_end: {event_type}')" is never printed.
# The code executes and performs the task correctly, but it seems like the callbacks just don't execute.
# a callback handler
cost_meta = CostLoggingCallback(print_output=False, provider=LLM_PROVIDER)
user_llm = # established using same code as in working example
routing = # also established the same way.
Settings.callback_manager = CallbackManager([cost_meta])
summary_engine = summary_index.as_query_engine(
llm=user_llm,
response_mode=ResponseMode.TREE_SUMMARIZE,
use_async=True,
)
semantic_tool = (similarly defined with 'user_llm')
selector = (correctly defined)
router_engine = RouterQueryEngine.from_defaults(
selector=selector,
llm=routing,
query_engine_tools=[semantic_tool, summary_tool],
)
query_result = await router_engine.aquery(query)
logger.glau(f"cost callback:{cost_meta.records}")
# When I run the above code, all I end up capturing is the 'CHUNKING' event and the 'records' that should hold a list of each
# event sent to the OpenRouter provider is empty. The 'on_event_end' event never seems to fire
"""
---- on_event_start -----
on_event_start key=EventPayload.CHUNKS
---- end of on_event_start -----
-> on_event_end: CBEventType.CHUNKING
on_event_end: caught EventPayload.CHUNKS
---- on_event_start -----
on_event_start key=EventPayload.CHUNKS
---- end of on_event_start -----
-> on_event_end: CBEventType.CHUNKING
on_event_end: caught EventPayload.CHUNKS
2025-12-09 16:46:52,256 - GLAU - execute_query: await finished generating main query_result p_id: 1763670638 Q#: 1
2025-12-09 16:46:52,256 - GLAU - cost callback:[]
"""
# The CostLoggingCallback works as expected with this example
cost_meta = CostLoggingCallback(print_output=False, provider=LLM_PROVIDER)
## -snip- code here that sets up llm as an OpenRouter object
r = llm.complete('Hello.')
print('from callback')
pprint(cost_meta.records)
# outputs the following
"""
---- on_event_start -----
Prompt: Hello.
---- end of on_event_start -----
-> on_event_end: CBEventType.LLM
Time to complete openrouter call: 1.4543
from callback
[{'completion_tokens': 216,
'cost': 0.0005204,
'created_at': None,
'id': 'gen-1765326535-6Ro84KCBEnQtQiVAtdQd',
'model': 'qwen/qwen3-30b-a3b-thinking-2507',
'prompt_tokens': 10,
'provider': 'Alibaba',
'total_tokens': 226}]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment