Created
June 1, 2025 17:48
-
-
Save adamgross/8b6587aa8d09b1a52d1dbafdd2c4b4b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// API Keys | |
const OPENAI_API_KEY = ''; | |
// Sheet Names | |
const TRANSACTION_SHEET_NAME = 'Transactions'; | |
const CATEGORY_SHEET_NAME = 'Categories'; | |
// Column Names | |
const TRANSACTION_ID_COL_NAME = 'Transaction ID'; | |
const ORIGINAL_DESCRIPTION_COL_NAME = 'Full Description'; | |
const DESCRIPTION_COL_NAME = 'Description'; | |
const CATEGORY_COL_NAME = 'Category'; | |
const AI_AUTOCAT_COL_NAME = 'AI AutoCat' | |
const DATE_COL_NAME = 'Date'; | |
// Fallback Transaction Category | |
const FALLBACK_CATEGORY = "Misc Unknown"; | |
// Other Misc Parameters | |
const MAX_BATCH_SIZE = 75; | |
// Cache for similar transactions to avoid repeated queries | |
let similarTransactionsCache = new Map(); | |
let allTransactionsData = null; | |
let categoryList = null; | |
function categorizeUncategorizedTransactions() { | |
const startTime = new Date(); | |
// Clear cache at start of each run | |
similarTransactionsCache.clear(); | |
allTransactionsData = null; | |
categoryList = null; | |
const uncategorizedTransactions = getTransactionsToCategorize(); | |
const numTxnsToCategorize = uncategorizedTransactions.length; | |
if (numTxnsToCategorize == 0) { | |
Logger.log("No uncategorized transactions found"); | |
return; | |
} | |
Logger.log(`Found ${numTxnsToCategorize} transactions to categorize`); | |
// Pre-load all transaction data once | |
loadAllTransactionsData(); | |
// Pre-load category list once | |
categoryList = getAllowedCategories(); | |
Logger.log("Looking for historical similar transactions..."); | |
const transactionList = []; | |
// Process in batches to find similar transactions more efficiently | |
for (let i = 0; i < uncategorizedTransactions.length; i++) { | |
const similarTransactions = findSimilarTransactionsOptimized(uncategorizedTransactions[i][1]); | |
transactionList.push({ | |
'transaction_id': uncategorizedTransactions[i][0], | |
'original_description': uncategorizedTransactions[i][1], | |
'previous_transactions': similarTransactions | |
}); | |
} | |
Logger.log("Processing transactions with OpenAI..."); | |
const updatedTransactions = lookupDescAndCategory(transactionList, categoryList); | |
if (updatedTransactions != null) { | |
Logger.log("OpenAI returned suggested categories and descriptions"); | |
Logger.log("Writing updated transactions into sheet..."); | |
writeUpdatedTransactionsBatch(updatedTransactions, categoryList); | |
Logger.log("Finished updating sheet!"); | |
} | |
const endTime = new Date(); | |
Logger.log(`Total execution time: ${(endTime - startTime) / 1000} seconds`); | |
} | |
// Pre-load all transaction data to avoid repeated sheet reads | |
function loadAllTransactionsData() { | |
if (allTransactionsData !== null) return; // Already loaded | |
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(TRANSACTION_SHEET_NAME); | |
const headers = sheet.getRange("1:1").getValues()[0]; | |
// Get all data at once | |
const lastRow = sheet.getLastRow(); | |
if (lastRow <= 1) { | |
allTransactionsData = []; | |
return; | |
} | |
const allData = sheet.getRange(2, 1, lastRow - 1, headers.length).getValues(); | |
// Create indexed data structure | |
allTransactionsData = { | |
headers: headers, | |
data: allData, | |
descColIndex: headers.indexOf(DESCRIPTION_COL_NAME), | |
origDescColIndex: headers.indexOf(ORIGINAL_DESCRIPTION_COL_NAME), | |
categoryColIndex: headers.indexOf(CATEGORY_COL_NAME), | |
dateColIndex: headers.indexOf(DATE_COL_NAME), | |
txnIdColIndex: headers.indexOf(TRANSACTION_ID_COL_NAME) | |
}; | |
} | |
// Optimized version that works with pre-loaded data | |
function findSimilarTransactionsOptimized(originalDescription) { | |
const matchString = normalizeDescription(originalDescription); | |
// Check cache first | |
if (similarTransactionsCache.has(matchString)) { | |
return similarTransactionsCache.get(matchString); | |
} | |
if (!allTransactionsData) { | |
loadAllTransactionsData(); | |
} | |
const previousTransactionList = []; | |
const { data, descColIndex, origDescColIndex, categoryColIndex, dateColIndex } = allTransactionsData; | |
// Search through pre-loaded data | |
for (let i = 0; i < data.length; i++) { | |
const row = data[i]; | |
const category = row[categoryColIndex]; | |
const origDesc = row[origDescColIndex]; | |
const desc = row[descColIndex]; | |
// Skip if no category | |
if (!category) continue; | |
// Check if description matches | |
const origDescLower = (origDesc || '').toString().toLowerCase(); | |
const descLower = (desc || '').toString().toLowerCase(); | |
if (origDescLower.includes(matchString) || descLower.includes(matchString)) { | |
previousTransactionList.push({ | |
'original_description': origDesc, | |
'updated_description': desc, | |
'category': category, | |
'date': row[dateColIndex] | |
}); | |
// Limit to 3 matches for performance | |
if (previousTransactionList.length >= 3) break; | |
} | |
} | |
// Sort by date (most recent first) if we have date info | |
previousTransactionList.sort((a, b) => { | |
if (!a.date || !b.date) return 0; | |
return new Date(b.date) - new Date(a.date); | |
}); | |
// Remove date from final result | |
const result = previousTransactionList.slice(0, 3).map(tx => ({ | |
'original_description': tx.original_description, | |
'updated_description': tx.updated_description, | |
'category': tx.category | |
})); | |
// Cache the result | |
similarTransactionsCache.set(matchString, result); | |
return result; | |
} | |
// Extract the normalization logic into a separate function | |
function normalizeDescription(originalDescription) { | |
let matchString = originalDescription.toLowerCase(); | |
// Remove phone number placeholder | |
matchString = matchString.replace('xx', '#'); | |
// Strip numbers at end | |
const descriptionParts = matchString.split('#'); | |
matchString = descriptionParts[0]; | |
// Remove unimportant words - use a more efficient approach | |
const wordsToRemove = [ | |
'direct debit ', 'direct deposit ', 'zelle payment from ', 'bill payment ', | |
'dividend received ', 'debit card purchase ', 'sq *', 'sq*', 'tst *', 'tst*', | |
'in *', 'in*', 'tcb *', 'tcb*', 'dd *', 'dd*', 'py *', 'py*', 'p *', 'pp*', | |
'rx *', 'rx*', 'intuit *', 'intuit*', 'microsoft *', 'microsoft*' | |
]; | |
wordsToRemove.forEach(word => { | |
matchString = matchString.replace(word, ''); | |
}); | |
matchString = matchString.replace('*', ' '); | |
matchString = matchString.trim(); | |
matchString = matchString.replace(/\s+/g, ' '); | |
// Grab first 3 words | |
const parts = matchString.split(' '); | |
return parts.slice(0, Math.min(3, parts.length)).join(' '); | |
} | |
// Optimized batch write function | |
function writeUpdatedTransactionsBatch(transactionList, categoryList) { | |
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Transactions"); | |
const headers = sheet.getRange("1:1").getValues()[0]; | |
const descriptionColumnIndex = headers.indexOf(DESCRIPTION_COL_NAME); | |
const categoryColumnIndex = headers.indexOf(CATEGORY_COL_NAME); | |
const transactionIDColumnIndex = headers.indexOf(TRANSACTION_ID_COL_NAME); | |
const openAIFlagColIndex = headers.indexOf(AI_AUTOCAT_COL_NAME); | |
// Get all transaction IDs at once to create a lookup map | |
const lastRow = sheet.getLastRow(); | |
const txnIdRange = sheet.getRange(2, transactionIDColumnIndex + 1, lastRow - 1, 1).getValues(); | |
const txnIdToRowMap = new Map(); | |
for (let i = 0; i < txnIdRange.length; i++) { | |
txnIdToRowMap.set(txnIdRange[i][0].toString(), i + 2); // +2 because of 0-based index and header row | |
} | |
// Prepare batch updates | |
const categoryUpdates = []; | |
const descriptionUpdates = []; | |
const flagUpdates = []; | |
for (const transaction of transactionList) { | |
const rowIndex = txnIdToRowMap.get(transaction.transaction_id.toString()); | |
if (rowIndex) { | |
// Category update | |
let updatedCategory = transaction.category; | |
if (!categoryList.includes(updatedCategory)) { | |
updatedCategory = FALLBACK_CATEGORY; | |
} | |
categoryUpdates.push({ | |
range: sheet.getRange(rowIndex, categoryColumnIndex + 1), | |
value: updatedCategory | |
}); | |
// Description update (commented out in original, keeping same) | |
// descriptionUpdates.push({ | |
// range: sheet.getRange(rowIndex, descriptionColumnIndex + 1), | |
// value: transaction.updated_description | |
// }); | |
// AI flag update | |
if (openAIFlagColIndex >= 0) { | |
flagUpdates.push({ | |
range: sheet.getRange(rowIndex, openAIFlagColIndex + 1), | |
value: "TRUE" | |
}); | |
} | |
} | |
} | |
// Execute batch updates | |
try { | |
// Update categories | |
categoryUpdates.forEach(update => { | |
update.range.setValue(update.value); | |
}); | |
// Update descriptions (if needed) | |
// descriptionUpdates.forEach(update => { | |
// update.range.setValue(update.value); | |
// }); | |
// Update AI flags | |
flagUpdates.forEach(update => { | |
update.range.setValue(update.value); | |
}); | |
} catch (error) { | |
Logger.log(`Batch update error: ${error}`); | |
} | |
} | |
// Optimized to get transactions more efficiently | |
function getTransactionsToCategorize() { | |
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(TRANSACTION_SHEET_NAME); | |
const headers = sheet.getRange("1:1").getValues()[0]; | |
const txnIDColIndex = headers.indexOf(TRANSACTION_ID_COL_NAME); | |
const origDescColIndex = headers.indexOf(ORIGINAL_DESCRIPTION_COL_NAME); | |
const categoryColIndex = headers.indexOf(CATEGORY_COL_NAME); | |
if (txnIDColIndex === -1 || origDescColIndex === -1 || categoryColIndex === -1) { | |
Logger.log("Required columns not found"); | |
return []; | |
} | |
const lastRow = sheet.getLastRow(); | |
if (lastRow <= 1) return []; | |
const allData = sheet.getRange(2, 1, lastRow - 1, headers.length).getValues(); | |
const uncategorizedTransactions = []; | |
for (let i = 0; i < allData.length && uncategorizedTransactions.length < MAX_BATCH_SIZE; i++) { | |
const row = allData[i]; | |
const txnId = row[txnIDColIndex]; | |
const origDesc = row[origDescColIndex]; | |
const category = row[categoryColIndex]; | |
// Check if has original description but no category | |
if (origDesc && !category) { | |
uncategorizedTransactions.push([txnId, origDesc]); | |
} | |
} | |
return uncategorizedTransactions; | |
} | |
// Cache categories to avoid repeated reads | |
function getAllowedCategories() { | |
if (categoryList !== null) return categoryList; // Use cached version | |
const spreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
const categorySheet = spreadsheet.getSheetByName(CATEGORY_SHEET_NAME); | |
const headers = categorySheet.getRange("1:1").getValues()[0]; | |
const categoryColIndex = headers.indexOf(CATEGORY_COL_NAME); | |
if (categoryColIndex === -1) { | |
Logger.log("Category column not found"); | |
return []; | |
} | |
const lastRow = categorySheet.getLastRow(); | |
if (lastRow <= 1) return []; | |
const categoryListRaw = categorySheet.getRange(2, categoryColIndex + 1, lastRow - 1, 1).getValues(); | |
categoryList = categoryListRaw | |
.map(row => row[0]) | |
.filter(category => category && category.toString().trim() !== ""); | |
return categoryList; | |
} | |
function getColumnLetterFromColumnHeader(columnHeaders, columnName) { | |
const columnIndex = columnHeaders.indexOf(columnName); | |
let columnLetter = ""; | |
let base = 26; | |
let letterCharCodeBase = 'A'.charCodeAt(0); | |
let index = columnIndex; | |
while (index >= 0) { | |
columnLetter = String.fromCharCode(index % base + letterCharCodeBase) + columnLetter; | |
index = Math.floor(index / base) - 1; | |
} | |
return columnLetter; | |
} | |
function lookupDescAndCategory(transactionList, categoryList, model = 'gpt-4o-mini') { | |
const transactionDict = { | |
"transactions": transactionList | |
}; | |
const request = { | |
model: model, | |
temperature: 0.2, | |
top_p: 0.1, | |
seed: 1, | |
response_format: {"type": "json_object"}, | |
messages: [ | |
{ | |
role: 'system', | |
content: 'Act as an API that categorizes and cleans up bank transaction descriptions for a personal finance app.' | |
}, | |
{ | |
role: 'system', | |
content: 'Reference the following list of allowed_categories:\n' + JSON.stringify(categoryList) | |
}, | |
{ | |
role: 'system', | |
content: `You will be given JSON input with a list of transaction descriptions and potentially related previously categorized transactions in the following format: | |
{"transactions": [ | |
{ | |
"transaction_id": "A unique ID for this transaction" | |
"original_description": "The original raw transaction description", | |
"previous_transactions": "(optional) Previously cleaned up transaction descriptions and the prior category used that may be related to this transaction" | |
} | |
]} | |
For each transaction provided, follow these instructions: | |
(0) If previous_transactions were provided, see if the current transaction matches a previous one closely. If it does, use the updated_description and category of the previous transaction exactly, including capitalization and punctuation. | |
(1) If there is no matching previous_transaction, or none was provided suggest a better "updated_description" according to the following rules: | |
(a) Use all of your knowledge and information to propose a friendly, human readable updated_description for the transaction given the original_description. The input often contains the name of a merchant name. If you know of a merchant it might be referring to, use the name of that merchant for the suggested description. | |
(b) Keep the suggested description as simple as possible. Remove punctuation, extraneous numbers, location information, abbreviations such as "Inc." or "LLC", IDs and account numbers. | |
(2) For each original_description, suggest a "category" for the transaction from the allowed_categories list that was provided. | |
(3) If you are not confident in the suggested category after using your own knowledge and the previous transactions provided, use the category "${FALLBACK_CATEGORY}" | |
(4) Your response should be a JSON object and no other text. The response object should be of the form: | |
{"suggested_transactions": [ | |
{ | |
"transaction_id": "The unique ID previously provided for this transaction", | |
"updated_description": "The cleaned up version of the description", | |
"category": "A category selected from the allowed_categories list" | |
} | |
]}` | |
}, | |
{ | |
role: 'user', | |
content: JSON.stringify(transactionDict) | |
} | |
] | |
}; | |
const jsonRequest = JSON.stringify(request); | |
const options = { | |
method: 'POST', | |
contentType: 'application/json', | |
headers: {'Authorization': 'Bearer ' + OPENAI_API_KEY}, | |
payload: jsonRequest, | |
muteHttpExceptions: true, | |
}; | |
try { | |
const response = UrlFetchApp.fetch("https://api.openai.com/v1/chat/completions", options).getContentText(); | |
const parsedResponse = JSON.parse(response); | |
if ("error" in parsedResponse) { | |
Logger.log("Error from OpenAI: " + parsedResponse["error"]["message"]); | |
return null; | |
} else { | |
const apiResponse = JSON.parse(parsedResponse["choices"][0]["message"]["content"]); | |
return apiResponse["suggested_transactions"]; | |
} | |
} catch (error) { | |
Logger.log("Error in OpenAI API call: " + error); | |
return null; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment