Created
June 13, 2024 20:19
-
-
Save calebdre/bb0e31ba487ad9f5528bc77779ee217a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { getEncoding } from "js-tiktoken"; | |
| function chunkFunctions(files: FileInfo[], maxTokens: number): string[] { | |
| const chunks: string[] = []; | |
| let currentChunk = ''; | |
| // initialize the encoder | |
| const encoder = getEncoding('cl100k_base'); | |
| for (const file of files) { | |
| if (!file.functions) continue; | |
| for (const func of file.functions) { | |
| const formattedFunction = `${file.path}/${file.filename}\n${func.function}\n\n`; | |
| const functionTokens = encoder.encode(formattedFunction).length; | |
| // check if the current function can fit in the current chunk | |
| if (encoder.encode(currentChunk).length + functionTokens <= maxTokens) { | |
| // add the function to the current chunk | |
| currentChunk += formattedFunction; | |
| } else { | |
| // if the current chunk is not empty, add it to the list of chunks | |
| if (currentChunk !== '') { | |
| chunks.push(currentChunk.trim()); | |
| currentChunk = ''; | |
| } | |
| // check if the function can fit in a single chunk | |
| if (functionTokens <= maxTokens) { | |
| currentChunk += formattedFunction; | |
| } else { | |
| console.warn(`Function "${func.functionName}" in file "${file.path}/${file.filename}" exceeds the maximum token limit and will be skipped.`); | |
| } | |
| } | |
| } | |
| } | |
| // if the current chunk is not empty, add it to the list of chunks | |
| if (currentChunk !== '') { | |
| chunks.push(currentChunk.trim()); | |
| } | |
| return chunks; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment