Skip to content

Instantly share code, notes, and snippets.

@calebdre
Created June 13, 2024 20:19
Show Gist options
  • Select an option

  • Save calebdre/bb0e31ba487ad9f5528bc77779ee217a to your computer and use it in GitHub Desktop.

Select an option

Save calebdre/bb0e31ba487ad9f5528bc77779ee217a to your computer and use it in GitHub Desktop.
import { getEncoding } from "js-tiktoken";
function chunkFunctions(files: FileInfo[], maxTokens: number): string[] {
const chunks: string[] = [];
let currentChunk = '';
// initialize the encoder
const encoder = getEncoding('cl100k_base');
for (const file of files) {
if (!file.functions) continue;
for (const func of file.functions) {
const formattedFunction = `${file.path}/${file.filename}\n${func.function}\n\n`;
const functionTokens = encoder.encode(formattedFunction).length;
// check if the current function can fit in the current chunk
if (encoder.encode(currentChunk).length + functionTokens <= maxTokens) {
// add the function to the current chunk
currentChunk += formattedFunction;
} else {
// if the current chunk is not empty, add it to the list of chunks
if (currentChunk !== '') {
chunks.push(currentChunk.trim());
currentChunk = '';
}
// check if the function can fit in a single chunk
if (functionTokens <= maxTokens) {
currentChunk += formattedFunction;
} else {
console.warn(`Function "${func.functionName}" in file "${file.path}/${file.filename}" exceeds the maximum token limit and will be skipped.`);
}
}
}
}
// if the current chunk is not empty, add it to the list of chunks
if (currentChunk !== '') {
chunks.push(currentChunk.trim());
}
return chunks;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment