Last active
August 5, 2024 03:32
-
-
Save Attunewise/f961bb233e09e636016fde225c32f122 to your computer and use it in GitHub Desktop.
Counts ChatGPT tokens
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { GPT4Tokenizer } = require('gpt4-tokenizer') | |
const json = require('./conversations.json') | |
const tokenizer = new GPT4Tokenizer({type:'gpt-3'}) | |
const nodes = [] | |
for (const row of json) { | |
let id = row.current_node | |
const { mapping } = row | |
while (id) { | |
const node = mapping[id] | |
if (node.message) { | |
nodes.push(node) | |
} | |
id = node.parent | |
} | |
} | |
function formatEpochToDate(epoch) { | |
const date = new Date(epoch * 1000); // Convert seconds to milliseconds | |
const year = date.getUTCFullYear(); | |
const month = String(date.getUTCMonth() + 1).padStart(2, '0'); // Months are zero-indexed | |
const day = String(date.getUTCDate()).padStart(2, '0'); | |
return `${year}-${month}-${day}`; | |
} | |
// Function to group items by date | |
function groupItemsByDate(items) { | |
const groups = {}; | |
items.forEach((item) => { | |
if (item.message) { | |
const date = formatEpochToDate(item.message.create_time); | |
if (!groups[date]) { | |
groups[date] = []; | |
} | |
groups[date].push(item); | |
} else { | |
//console.log(item) | |
} | |
}); | |
return Object.values(groups); | |
} | |
nodes.sort((x, y) => { | |
return x.message.create_time - y.message.create_time | |
}) | |
// Group the items by date | |
const groupedItems = groupItemsByDate(nodes); | |
let input = 0 | |
let output = 0 | |
let inputChars = 0 | |
let outputChars = 0 | |
let maxInput = 0 | |
let maxOutput = 0 | |
for (const day of groupedItems) { | |
for (const node of day) { | |
//console.log(node) | |
const content = node.message.content | |
//console.log(content) | |
let text | |
switch (content.content_type) { | |
case 'text': | |
text = content.parts.join('') | |
break | |
} | |
//console.log({text}) | |
if (text) { | |
const tokens = tokenizer.estimateTokenCount(text) | |
//console.log(text, tokens) | |
if (node.message.author.role === 'assistant') { | |
outputChars += text.length | |
output += tokens | |
maxOutput = Math.max(maxOutput, tokens) | |
} else { | |
inputChars += text.length | |
input += tokens | |
maxInput = Math.max(maxInput, tokens) | |
} | |
} | |
} | |
} | |
console.log({days: groupedItems.length, input, output, inputChars, outputChars, maxInput, maxOutput}) | |
console.log("daily tokens", Math.round(input/groupedItems.length), Math.round(output/groupedItems.length)) | |
console.log("daily chars", Math.round(inputChars/groupedItems.length), Math.round(outputChars/groupedItems.length)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment