Created
December 10, 2025 00:56
-
-
Save filmo/e51d363feba69e0a25a90e7cf3eda4cf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import List, Optional, Dict, Any | |
| from llama_index.core.callbacks.base_handler import BaseCallbackHandler | |
| from llama_index.core.callbacks import CBEventType, EventPayload | |
| from pprint import pprint | |
| import uuid | |
| """ | |
| For recovering metadata from the 'raw' part of the ChatCompletion | |
| """ | |
| # noinspection PyAbstractClass | |
| class CostLoggingCallback(BaseCallbackHandler): | |
| """Logs token usage & cost for each LLM completion.""" | |
| def __init__( | |
| self, | |
| print_output = False, | |
| provider = False, | |
| event_starts_to_ignore: List[CBEventType] = None, | |
| event_ends_to_ignore: List[CBEventType] = None, | |
| ): | |
| super().__init__( | |
| event_starts_to_ignore or [], | |
| event_ends_to_ignore or [], | |
| ) | |
| self.print_output = print_output | |
| self.provider = provider | |
| self.records = [] # optional: store logs here | |
| def on_llm_end(self, output, **kwargs): | |
| raw = getattr(output, "raw", None) | |
| def g(obj, *path, default=None): | |
| for key in path: | |
| if obj is None: | |
| return default | |
| if hasattr(obj, key): | |
| obj = getattr(obj, key) | |
| elif isinstance(obj, dict) and key in obj: | |
| obj = obj[key] | |
| else: | |
| return default | |
| return obj | |
| data = { | |
| "model": g(raw, "model"), | |
| "provider": g(raw, "provider"), | |
| "prompt_tokens": g(raw, "usage", "prompt_tokens"), | |
| "completion_tokens": g(raw, "usage", "completion_tokens"), | |
| "total_tokens": g(raw, "usage", "total_tokens"), | |
| "cost": g(raw, "usage", "cost"), | |
| "id": g(raw, "id"), | |
| "created_at": g(raw,"created_at") | |
| } | |
| if data['id'] is None: | |
| data['id'] = str(uuid.uuid4()) | |
| if self.provider == "ollama": | |
| data['provider'] = 'Ollama' | |
| # Do whatever you want with the info: | |
| if self.print_output: | |
| print("LLM Usage:") | |
| pprint(data) | |
| self.records.append(data) | |
| # ---------------------------------------------------------------------- | |
| # REQUIRED ABSTRACT METHODS — provide no-op implementations | |
| # ---------------------------------------------------------------------- | |
| def start_trace(self, trace_id: Optional[str] = None): | |
| """Required by BaseCallbackHandler; safe no-op.""" | |
| # print(f'CostLoggingCallback: start_trace called: {trace_id}') | |
| pass | |
| def end_trace(self, trace_id: Optional[str] = None, trace_map: Optional[Dict[str, List[str]]] = None,): | |
| """Required by BaseCallbackHandler; safe no-op.""" | |
| # print('CostLoggingCallback: end_trace called') | |
| # pprint(trace_map) | |
| # print('--- end of end_trace ---') | |
| pass | |
| def on_event_start(self, event_type: CBEventType, | |
| payload: Optional[Dict[str, Any]] = None, | |
| event_id: str = "", | |
| parent_id: str = "", | |
| **kwargs: Any, | |
| ): | |
| print('---- on_event_start -----') | |
| oes = payload.get(EventPayload.PROMPT) | |
| if oes: | |
| print(f"Prompt: {oes}") | |
| else: | |
| for k in payload.keys(): | |
| print(f"on_event_start key={k}") | |
| print('---- end of on_event_start -----') | |
| # print('CostLoggingCallback: on_event_start called') | |
| """Required by BaseCallbackHandler; safe no-op.""" | |
| pass | |
| def on_event_end( | |
| self, | |
| event_type: CBEventType, | |
| payload: Optional[Dict[str, Any]] = None, | |
| event_id: str = "", | |
| **kwargs: Any, | |
| ): | |
| print(f'-> on_event_end: {event_type}') | |
| completion = payload.get(EventPayload.COMPLETION) | |
| chunks = payload.get(CBEventType.CHUNKING) | |
| if completion: | |
| # pprint(result) | |
| self.on_llm_end(output=completion) | |
| elif chunks: | |
| print(f"on_event_end: caught {event_type}") | |
| print("=-"*10) | |
| else: | |
| for k in payload.keys(): | |
| print(f"on_event_end: caught {k}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # but when I use the same callback with a more sophisticated llama_index workflow as shown below | |
| # it seems to only capure 'CHUNKS' being sent to the model but none of the results of the underlying LLM calls | |
| # at the end, the 'records' stored in the callback handler is still empty because it seems | |
| # like on_event_end is never triggered. For example the "print(f'-> on_event_end: {event_type}')" is never printed. | |
| # The code executes and performs the task correctly, but it seems like the callbacks just don't execute. | |
| # a callback handler | |
| cost_meta = CostLoggingCallback(print_output=False, provider=LLM_PROVIDER) | |
| user_llm = # established using same code as in working example | |
| routing = # also established the same way. | |
| Settings.callback_manager = CallbackManager([cost_meta]) | |
| summary_engine = summary_index.as_query_engine( | |
| llm=user_llm, | |
| response_mode=ResponseMode.TREE_SUMMARIZE, | |
| use_async=True, | |
| ) | |
| semantic_tool = (similarly defined with 'user_llm') | |
| selector = (correctly defined) | |
| router_engine = RouterQueryEngine.from_defaults( | |
| selector=selector, | |
| llm=routing, | |
| query_engine_tools=[semantic_tool, summary_tool], | |
| ) | |
| query_result = await router_engine.aquery(query) | |
| logger.glau(f"cost callback:{cost_meta.records}") | |
| # When I run the above code, all I end up capturing is the 'CHUNKING' event and the 'records' that should hold a list of each | |
| # event sent to the OpenRouter provider is empty. The 'on_event_end' event never seems to fire | |
| """ | |
| ---- on_event_start ----- | |
| on_event_start key=EventPayload.CHUNKS | |
| ---- end of on_event_start ----- | |
| -> on_event_end: CBEventType.CHUNKING | |
| on_event_end: caught EventPayload.CHUNKS | |
| ---- on_event_start ----- | |
| on_event_start key=EventPayload.CHUNKS | |
| ---- end of on_event_start ----- | |
| -> on_event_end: CBEventType.CHUNKING | |
| on_event_end: caught EventPayload.CHUNKS | |
| 2025-12-09 16:46:52,256 - GLAU - execute_query: await finished generating main query_result p_id: 1763670638 Q#: 1 | |
| 2025-12-09 16:46:52,256 - GLAU - cost callback:[] | |
| """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # The CostLoggingCallback works as expected with this example | |
| cost_meta = CostLoggingCallback(print_output=False, provider=LLM_PROVIDER) | |
| ## -snip- code here that sets up llm as an OpenRouter object | |
| r = llm.complete('Hello.') | |
| print('from callback') | |
| pprint(cost_meta.records) | |
| # outputs the following | |
| """ | |
| ---- on_event_start ----- | |
| Prompt: Hello. | |
| ---- end of on_event_start ----- | |
| -> on_event_end: CBEventType.LLM | |
| Time to complete openrouter call: 1.4543 | |
| from callback | |
| [{'completion_tokens': 216, | |
| 'cost': 0.0005204, | |
| 'created_at': None, | |
| 'id': 'gen-1765326535-6Ro84KCBEnQtQiVAtdQd', | |
| 'model': 'qwen/qwen3-30b-a3b-thinking-2507', | |
| 'prompt_tokens': 10, | |
| 'provider': 'Alibaba', | |
| 'total_tokens': 226}] | |
| """ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment