Created
October 27, 2025 18:48
-
-
Save Fashad-Ahmed/5161b30a76e13614754dd220df5dde16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Example implementation of proper tokenization for Llama and Gemini | |
| * This file shows how to implement Option 4 with full model-specific tokenization | |
| * | |
| * To use this: | |
| * 1. Install required dependencies: npm install @huggingface/tokenizers | |
| * 2. Replace the current token-counter.util.ts with this implementation | |
| * 3. Update all countTokens calls to handle async properly | |
| */ | |
| import { encoding_for_model, get_encoding, type Tiktoken } from 'tiktoken'; | |
| import { Model } from '../../debate/enums/model.enum'; | |
| // import { Tokenizer } from '@huggingface/tokenizers'; // Uncomment when ready | |
| export class TokenCounter { | |
| private static readonly FALLBACK_CHARS_PER_TOKEN = 4; | |
| private static encodingCache = new Map<string, Tiktoken>(); | |
| // private static llamaTokenizer: Tokenizer | null = null; | |
| // private static geminiTokenizer: any = null; | |
| /** | |
| * Load Llama tokenizer from Hugging Face | |
| * This provides accurate tokenization for Llama models | |
| */ | |
| private static async getLlamaTokenizer() { | |
| // const { Tokenizer } = await import('@huggingface/tokenizers'); | |
| // if (!this.llamaTokenizer) { | |
| // this.llamaTokenizer = await Tokenizer.from_pretrained("meta-llama/Llama-4"); | |
| // } | |
| // return this.llamaTokenizer; | |
| // For now, return null to use fallback | |
| return null; | |
| } | |
| /** | |
| * Count tokens for Llama models using proper SentencePiece tokenization | |
| */ | |
| private static async countLlamaTokens(text: string): Promise<number> { | |
| try { | |
| const tokenizer = await this.getLlamaTokenizer(); | |
| if (tokenizer) { | |
| const encoded = await tokenizer.encode(text); | |
| return encoded.length; | |
| } | |
| } catch (error) { | |
| console.warn('Llama tokenization failed, using fallback:', error); | |
| } | |
| // Fallback to approximation | |
| return this.fallbackTokenCount(text); | |
| } | |
| /** | |
| * Count tokens for Gemini models | |
| * Note: Google doesn't provide a public tokenizer, so we use approximation | |
| */ | |
| private static async countGeminiTokens(text: string): Promise<number> { | |
| // Google Gemini uses SentencePiece but doesn't expose it publicly | |
| // Use a more accurate approximation if needed | |
| return this.fallbackTokenCount(text); | |
| } | |
| private static getEncodingForModel(model: Model): Tiktoken | null { | |
| const cacheKey = model; | |
| if (this.encodingCache.has(cacheKey)) { | |
| return this.encodingCache.get(cacheKey) || null; | |
| } | |
| try { | |
| let encoding: Tiktoken; | |
| switch (model) { | |
| case Model.GPT_5_mini: | |
| case Model.GPT_4o_mini: | |
| encoding = encoding_for_model('gpt-4'); | |
| break; | |
| case Model.Grok_4: | |
| encoding = encoding_for_model('gpt-4'); | |
| break; | |
| case Model.Claude_3_5_Sonnet: | |
| case Model.Claude_3_5_Haiku: | |
| encoding = encoding_for_model('gpt-4'); | |
| break; | |
| case Model.DeepSeek_V3: | |
| encoding = encoding_for_model('gpt-4'); | |
| break; | |
| case Model.Llama_4: | |
| case Model.Gemini_Pro: | |
| // These will be handled by async methods | |
| // Return null to trigger async handling | |
| return null; | |
| default: | |
| encoding = get_encoding('cl100k_base'); | |
| } | |
| this.encodingCache.set(cacheKey, encoding); | |
| return encoding; | |
| } catch (error) { | |
| console.warn(`Failed to get encoding for model ${model}:`, error); | |
| return null; | |
| } | |
| } | |
| /** | |
| * Count tokens with proper async handling for Llama and Gemini | |
| */ | |
| static async countTokens( | |
| text: string, | |
| model: Model = Model.GPT_4o_mini, | |
| ): Promise<number> { | |
| if (!text) return 0; | |
| // Handle models that require async tokenization | |
| if (model === Model.Llama_4) { | |
| return await this.countLlamaTokens(text); | |
| } | |
| if (model === Model.Gemini_Pro) { | |
| return await this.countGeminiTokens(text); | |
| } | |
| // Handle models with tiktoken (synchronous) | |
| try { | |
| const encoding = this.getEncodingForModel(model); | |
| if (encoding) { | |
| return encoding.encode(text).length; | |
| } | |
| } catch (error) { | |
| console.warn(`Token counting failed for model ${model}:`, error); | |
| } | |
| // Fallback to character-based estimation | |
| return this.fallbackTokenCount(text); | |
| } | |
| /** | |
| * Synchronous wrapper for countTokens (for backward compatibility) | |
| * Note: This will return approximation for Llama and Gemini | |
| */ | |
| static countTokensSync( | |
| text: string, | |
| model: Model = Model.GPT_4o_mini, | |
| ): number { | |
| if (!text) return 0; | |
| // For async models, use fallback | |
| if (model === Model.Llama_4 || model === Model.Gemini_Pro) { | |
| return this.fallbackTokenCount(text); | |
| } | |
| // Handle models with tiktoken (synchronous) | |
| try { | |
| const encoding = this.getEncodingForModel(model); | |
| if (encoding) { | |
| return encoding.encode(text).length; | |
| } | |
| } catch (error) { | |
| console.warn(`Token counting failed for model ${model}:`, error); | |
| } | |
| return this.fallbackTokenCount(text); | |
| } | |
| static async countTokensForArray( | |
| texts: string[], | |
| model: Model = Model.GPT_4o_mini, | |
| ): Promise<number> { | |
| const counts = await Promise.all( | |
| texts.map((text) => this.countTokens(text, model)), | |
| ); | |
| return counts.reduce((sum, count) => sum + count, 0); | |
| } | |
| static async countTokensForJSON( | |
| obj: any, | |
| model: Model = Model.GPT_4o_mini, | |
| ): Promise<number> { | |
| return await this.countTokens(JSON.stringify(obj), model); | |
| } | |
| static async truncateToTokenLimit( | |
| text: string, | |
| maxTokens: number, | |
| model: Model = Model.GPT_4o_mini, | |
| ): Promise<string> { | |
| const currentTokens = await this.countTokens(text, model); | |
| if (currentTokens <= maxTokens) return text; | |
| // For async models, use fallback truncation | |
| if (model === Model.Llama_4 || model === Model.Gemini_Pro) { | |
| return this.fallbackTruncate(text, maxTokens); | |
| } | |
| // For other models, use tiktoken truncation | |
| try { | |
| const encoding = this.getEncodingForModel(model); | |
| if (encoding) { | |
| const tokens = encoding.encode(text); | |
| if (tokens.length <= maxTokens) return text; | |
| const truncatedTokens = tokens.slice(0, maxTokens - 1); | |
| const truncatedText = encoding.decode(truncatedTokens); | |
| const truncatedString = String(truncatedText); | |
| if ((await this.countTokens(truncatedString, model)) < currentTokens) { | |
| return truncatedString + '...'; | |
| } | |
| } | |
| } catch (error) { | |
| console.warn(`Token truncation failed:`, error); | |
| } | |
| return this.fallbackTruncate(text, maxTokens); | |
| } | |
| private static fallbackTokenCount(text: string): number { | |
| const baseTokens = Math.ceil(text.length / this.FALLBACK_CHARS_PER_TOKEN); | |
| return Math.ceil(baseTokens * 1.1); | |
| } | |
| private static fallbackTruncate(text: string, maxTokens: number): string { | |
| const targetChars = Math.floor( | |
| maxTokens * this.FALLBACK_CHARS_PER_TOKEN * 0.9, | |
| ); | |
| return text.slice(0, targetChars) + '...'; | |
| } | |
| static getTokenLimit(model: Model): number { | |
| switch (model) { | |
| case Model.GPT_5_mini: | |
| return 128000; | |
| case Model.GPT_4o_mini: | |
| return 128000; | |
| case Model.Grok_4: | |
| return 128000; | |
| case Model.Claude_3_5_Sonnet: | |
| return 200000; | |
| case Model.Claude_3_5_Haiku: | |
| return 200000; | |
| case Model.DeepSeek_V3: | |
| return 128000; | |
| case Model.Llama_4: | |
| return 128000; | |
| case Model.Gemini_Pro: | |
| return 1000000; | |
| default: | |
| return 128000; | |
| } | |
| } | |
| static async fitsWithinLimit( | |
| text: string, | |
| model: Model = Model.GPT_4o_mini, | |
| ): Promise<boolean> { | |
| const tokens = await this.countTokens(text, model); | |
| return tokens <= this.getTokenLimit(model); | |
| } | |
| static async getTokenUsagePercentage( | |
| text: string, | |
| model: Model = Model.GPT_4o_mini, | |
| ): Promise<number> { | |
| const tokens = await this.countTokens(text, model); | |
| const limit = this.getTokenLimit(model); | |
| return (tokens / limit) * 100; | |
| } | |
| static cleanup(): void { | |
| this.encodingCache.clear(); | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment