Skip to content

Instantly share code, notes, and snippets.

@adamgross
Created June 1, 2025 17:48
Show Gist options
  • Save adamgross/8b6587aa8d09b1a52d1dbafdd2c4b4b9 to your computer and use it in GitHub Desktop.
Save adamgross/8b6587aa8d09b1a52d1dbafdd2c4b4b9 to your computer and use it in GitHub Desktop.
// API Keys
const OPENAI_API_KEY = '';
// Sheet Names
const TRANSACTION_SHEET_NAME = 'Transactions';
const CATEGORY_SHEET_NAME = 'Categories';
// Column Names
const TRANSACTION_ID_COL_NAME = 'Transaction ID';
const ORIGINAL_DESCRIPTION_COL_NAME = 'Full Description';
const DESCRIPTION_COL_NAME = 'Description';
const CATEGORY_COL_NAME = 'Category';
const AI_AUTOCAT_COL_NAME = 'AI AutoCat'
const DATE_COL_NAME = 'Date';
// Fallback Transaction Category
const FALLBACK_CATEGORY = "Misc Unknown";
// Other Misc Parameters
const MAX_BATCH_SIZE = 75;
// Cache for similar transactions to avoid repeated queries
let similarTransactionsCache = new Map();
let allTransactionsData = null;
let categoryList = null;
function categorizeUncategorizedTransactions() {
const startTime = new Date();
// Clear cache at start of each run
similarTransactionsCache.clear();
allTransactionsData = null;
categoryList = null;
const uncategorizedTransactions = getTransactionsToCategorize();
const numTxnsToCategorize = uncategorizedTransactions.length;
if (numTxnsToCategorize == 0) {
Logger.log("No uncategorized transactions found");
return;
}
Logger.log(`Found ${numTxnsToCategorize} transactions to categorize`);
// Pre-load all transaction data once
loadAllTransactionsData();
// Pre-load category list once
categoryList = getAllowedCategories();
Logger.log("Looking for historical similar transactions...");
const transactionList = [];
// Process in batches to find similar transactions more efficiently
for (let i = 0; i < uncategorizedTransactions.length; i++) {
const similarTransactions = findSimilarTransactionsOptimized(uncategorizedTransactions[i][1]);
transactionList.push({
'transaction_id': uncategorizedTransactions[i][0],
'original_description': uncategorizedTransactions[i][1],
'previous_transactions': similarTransactions
});
}
Logger.log("Processing transactions with OpenAI...");
const updatedTransactions = lookupDescAndCategory(transactionList, categoryList);
if (updatedTransactions != null) {
Logger.log("OpenAI returned suggested categories and descriptions");
Logger.log("Writing updated transactions into sheet...");
writeUpdatedTransactionsBatch(updatedTransactions, categoryList);
Logger.log("Finished updating sheet!");
}
const endTime = new Date();
Logger.log(`Total execution time: ${(endTime - startTime) / 1000} seconds`);
}
// Pre-load all transaction data to avoid repeated sheet reads
function loadAllTransactionsData() {
if (allTransactionsData !== null) return; // Already loaded
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(TRANSACTION_SHEET_NAME);
const headers = sheet.getRange("1:1").getValues()[0];
// Get all data at once
const lastRow = sheet.getLastRow();
if (lastRow <= 1) {
allTransactionsData = [];
return;
}
const allData = sheet.getRange(2, 1, lastRow - 1, headers.length).getValues();
// Create indexed data structure
allTransactionsData = {
headers: headers,
data: allData,
descColIndex: headers.indexOf(DESCRIPTION_COL_NAME),
origDescColIndex: headers.indexOf(ORIGINAL_DESCRIPTION_COL_NAME),
categoryColIndex: headers.indexOf(CATEGORY_COL_NAME),
dateColIndex: headers.indexOf(DATE_COL_NAME),
txnIdColIndex: headers.indexOf(TRANSACTION_ID_COL_NAME)
};
}
// Optimized version that works with pre-loaded data
function findSimilarTransactionsOptimized(originalDescription) {
const matchString = normalizeDescription(originalDescription);
// Check cache first
if (similarTransactionsCache.has(matchString)) {
return similarTransactionsCache.get(matchString);
}
if (!allTransactionsData) {
loadAllTransactionsData();
}
const previousTransactionList = [];
const { data, descColIndex, origDescColIndex, categoryColIndex, dateColIndex } = allTransactionsData;
// Search through pre-loaded data
for (let i = 0; i < data.length; i++) {
const row = data[i];
const category = row[categoryColIndex];
const origDesc = row[origDescColIndex];
const desc = row[descColIndex];
// Skip if no category
if (!category) continue;
// Check if description matches
const origDescLower = (origDesc || '').toString().toLowerCase();
const descLower = (desc || '').toString().toLowerCase();
if (origDescLower.includes(matchString) || descLower.includes(matchString)) {
previousTransactionList.push({
'original_description': origDesc,
'updated_description': desc,
'category': category,
'date': row[dateColIndex]
});
// Limit to 3 matches for performance
if (previousTransactionList.length >= 3) break;
}
}
// Sort by date (most recent first) if we have date info
previousTransactionList.sort((a, b) => {
if (!a.date || !b.date) return 0;
return new Date(b.date) - new Date(a.date);
});
// Remove date from final result
const result = previousTransactionList.slice(0, 3).map(tx => ({
'original_description': tx.original_description,
'updated_description': tx.updated_description,
'category': tx.category
}));
// Cache the result
similarTransactionsCache.set(matchString, result);
return result;
}
// Extract the normalization logic into a separate function
function normalizeDescription(originalDescription) {
let matchString = originalDescription.toLowerCase();
// Remove phone number placeholder
matchString = matchString.replace('xx', '#');
// Strip numbers at end
const descriptionParts = matchString.split('#');
matchString = descriptionParts[0];
// Remove unimportant words - use a more efficient approach
const wordsToRemove = [
'direct debit ', 'direct deposit ', 'zelle payment from ', 'bill payment ',
'dividend received ', 'debit card purchase ', 'sq *', 'sq*', 'tst *', 'tst*',
'in *', 'in*', 'tcb *', 'tcb*', 'dd *', 'dd*', 'py *', 'py*', 'p *', 'pp*',
'rx *', 'rx*', 'intuit *', 'intuit*', 'microsoft *', 'microsoft*'
];
wordsToRemove.forEach(word => {
matchString = matchString.replace(word, '');
});
matchString = matchString.replace('*', ' ');
matchString = matchString.trim();
matchString = matchString.replace(/\s+/g, ' ');
// Grab first 3 words
const parts = matchString.split(' ');
return parts.slice(0, Math.min(3, parts.length)).join(' ');
}
// Optimized batch write function
function writeUpdatedTransactionsBatch(transactionList, categoryList) {
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Transactions");
const headers = sheet.getRange("1:1").getValues()[0];
const descriptionColumnIndex = headers.indexOf(DESCRIPTION_COL_NAME);
const categoryColumnIndex = headers.indexOf(CATEGORY_COL_NAME);
const transactionIDColumnIndex = headers.indexOf(TRANSACTION_ID_COL_NAME);
const openAIFlagColIndex = headers.indexOf(AI_AUTOCAT_COL_NAME);
// Get all transaction IDs at once to create a lookup map
const lastRow = sheet.getLastRow();
const txnIdRange = sheet.getRange(2, transactionIDColumnIndex + 1, lastRow - 1, 1).getValues();
const txnIdToRowMap = new Map();
for (let i = 0; i < txnIdRange.length; i++) {
txnIdToRowMap.set(txnIdRange[i][0].toString(), i + 2); // +2 because of 0-based index and header row
}
// Prepare batch updates
const categoryUpdates = [];
const descriptionUpdates = [];
const flagUpdates = [];
for (const transaction of transactionList) {
const rowIndex = txnIdToRowMap.get(transaction.transaction_id.toString());
if (rowIndex) {
// Category update
let updatedCategory = transaction.category;
if (!categoryList.includes(updatedCategory)) {
updatedCategory = FALLBACK_CATEGORY;
}
categoryUpdates.push({
range: sheet.getRange(rowIndex, categoryColumnIndex + 1),
value: updatedCategory
});
// Description update (commented out in original, keeping same)
// descriptionUpdates.push({
// range: sheet.getRange(rowIndex, descriptionColumnIndex + 1),
// value: transaction.updated_description
// });
// AI flag update
if (openAIFlagColIndex >= 0) {
flagUpdates.push({
range: sheet.getRange(rowIndex, openAIFlagColIndex + 1),
value: "TRUE"
});
}
}
}
// Execute batch updates
try {
// Update categories
categoryUpdates.forEach(update => {
update.range.setValue(update.value);
});
// Update descriptions (if needed)
// descriptionUpdates.forEach(update => {
// update.range.setValue(update.value);
// });
// Update AI flags
flagUpdates.forEach(update => {
update.range.setValue(update.value);
});
} catch (error) {
Logger.log(`Batch update error: ${error}`);
}
}
// Optimized to get transactions more efficiently
function getTransactionsToCategorize() {
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(TRANSACTION_SHEET_NAME);
const headers = sheet.getRange("1:1").getValues()[0];
const txnIDColIndex = headers.indexOf(TRANSACTION_ID_COL_NAME);
const origDescColIndex = headers.indexOf(ORIGINAL_DESCRIPTION_COL_NAME);
const categoryColIndex = headers.indexOf(CATEGORY_COL_NAME);
if (txnIDColIndex === -1 || origDescColIndex === -1 || categoryColIndex === -1) {
Logger.log("Required columns not found");
return [];
}
const lastRow = sheet.getLastRow();
if (lastRow <= 1) return [];
const allData = sheet.getRange(2, 1, lastRow - 1, headers.length).getValues();
const uncategorizedTransactions = [];
for (let i = 0; i < allData.length && uncategorizedTransactions.length < MAX_BATCH_SIZE; i++) {
const row = allData[i];
const txnId = row[txnIDColIndex];
const origDesc = row[origDescColIndex];
const category = row[categoryColIndex];
// Check if has original description but no category
if (origDesc && !category) {
uncategorizedTransactions.push([txnId, origDesc]);
}
}
return uncategorizedTransactions;
}
// Cache categories to avoid repeated reads
function getAllowedCategories() {
if (categoryList !== null) return categoryList; // Use cached version
const spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
const categorySheet = spreadsheet.getSheetByName(CATEGORY_SHEET_NAME);
const headers = categorySheet.getRange("1:1").getValues()[0];
const categoryColIndex = headers.indexOf(CATEGORY_COL_NAME);
if (categoryColIndex === -1) {
Logger.log("Category column not found");
return [];
}
const lastRow = categorySheet.getLastRow();
if (lastRow <= 1) return [];
const categoryListRaw = categorySheet.getRange(2, categoryColIndex + 1, lastRow - 1, 1).getValues();
categoryList = categoryListRaw
.map(row => row[0])
.filter(category => category && category.toString().trim() !== "");
return categoryList;
}
function getColumnLetterFromColumnHeader(columnHeaders, columnName) {
const columnIndex = columnHeaders.indexOf(columnName);
let columnLetter = "";
let base = 26;
let letterCharCodeBase = 'A'.charCodeAt(0);
let index = columnIndex;
while (index >= 0) {
columnLetter = String.fromCharCode(index % base + letterCharCodeBase) + columnLetter;
index = Math.floor(index / base) - 1;
}
return columnLetter;
}
function lookupDescAndCategory(transactionList, categoryList, model = 'gpt-4o-mini') {
const transactionDict = {
"transactions": transactionList
};
const request = {
model: model,
temperature: 0.2,
top_p: 0.1,
seed: 1,
response_format: {"type": "json_object"},
messages: [
{
role: 'system',
content: 'Act as an API that categorizes and cleans up bank transaction descriptions for a personal finance app.'
},
{
role: 'system',
content: 'Reference the following list of allowed_categories:\n' + JSON.stringify(categoryList)
},
{
role: 'system',
content: `You will be given JSON input with a list of transaction descriptions and potentially related previously categorized transactions in the following format:
{"transactions": [
{
"transaction_id": "A unique ID for this transaction"
"original_description": "The original raw transaction description",
"previous_transactions": "(optional) Previously cleaned up transaction descriptions and the prior category used that may be related to this transaction"
}
]}
For each transaction provided, follow these instructions:
(0) If previous_transactions were provided, see if the current transaction matches a previous one closely. If it does, use the updated_description and category of the previous transaction exactly, including capitalization and punctuation.
(1) If there is no matching previous_transaction, or none was provided suggest a better "updated_description" according to the following rules:
(a) Use all of your knowledge and information to propose a friendly, human readable updated_description for the transaction given the original_description. The input often contains the name of a merchant name. If you know of a merchant it might be referring to, use the name of that merchant for the suggested description.
(b) Keep the suggested description as simple as possible. Remove punctuation, extraneous numbers, location information, abbreviations such as "Inc." or "LLC", IDs and account numbers.
(2) For each original_description, suggest a "category" for the transaction from the allowed_categories list that was provided.
(3) If you are not confident in the suggested category after using your own knowledge and the previous transactions provided, use the category "${FALLBACK_CATEGORY}"
(4) Your response should be a JSON object and no other text. The response object should be of the form:
{"suggested_transactions": [
{
"transaction_id": "The unique ID previously provided for this transaction",
"updated_description": "The cleaned up version of the description",
"category": "A category selected from the allowed_categories list"
}
]}`
},
{
role: 'user',
content: JSON.stringify(transactionDict)
}
]
};
const jsonRequest = JSON.stringify(request);
const options = {
method: 'POST',
contentType: 'application/json',
headers: {'Authorization': 'Bearer ' + OPENAI_API_KEY},
payload: jsonRequest,
muteHttpExceptions: true,
};
try {
const response = UrlFetchApp.fetch("https://api.openai.com/v1/chat/completions", options).getContentText();
const parsedResponse = JSON.parse(response);
if ("error" in parsedResponse) {
Logger.log("Error from OpenAI: " + parsedResponse["error"]["message"]);
return null;
} else {
const apiResponse = JSON.parse(parsedResponse["choices"][0]["message"]["content"]);
return apiResponse["suggested_transactions"];
}
} catch (error) {
Logger.log("Error in OpenAI API call: " + error);
return null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment