Created
February 17, 2023 19:03
-
-
Save timheuer/36e941b6ea601ea4ee8be2c285ca4490 to your computer and use it in GitHub Desktop.
Helper function to try to estimate token size for OpenAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static class TokenHelper | |
{ | |
public static int EstimateTokenSize(this string text) | |
{ | |
// Calculate the word count by splitting the text by spaces | |
int wordCount = text.Split(" ").Length; | |
// Calculate the character count by getting the length of the text | |
int charCount = text.Length; | |
// Estimate tokens count by word and char methods | |
int tokensCountWordEst = (int)Math.Ceiling(wordCount / 0.75); | |
int tokensCountCharEst = (int)Math.Ceiling(charCount / 4.0); | |
// Return the maximum of word and char estimates | |
return Math.Max(tokensCountWordEst, tokensCountCharEst); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment