Last active
March 3, 2019 04:05
-
-
Save trestletech/4384914 to your computer and use it in GitHub Desktop.
Categorize mint.com transactions. Transactions can be exported from Mint and reprocessed in R.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.Rproj.user | |
.Rhistory | |
.RData | |
transactions/ | |
budgets/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' CSV file of Mint.com categories. | |
cat <- read.csv(text="Auto & Transport, | |
,Auto Insurance | |
,Auto Payment | |
,Gas & Fuel | |
,Parking | |
,Public Transportation | |
,Service & Parts | |
,Tolls | |
Bills & Utilities, | |
,Home Phone | |
,Internet | |
,Mobile Phone | |
,Television | |
,Utilities | |
,Cable/Internet/Phone | |
Business Services, | |
,Advertising | |
,Legal | |
,Office Supplies | |
,Printing | |
,Shipping | |
Education, | |
,Books & Supplies | |
,Student Loan | |
,Tuition | |
Entertainment, | |
,Amusement | |
,Arts | |
,Movies & DVDs | |
,Music | |
,Newspapers & Magazines | |
Fees & Charges, | |
,ATM Fee | |
,Bank Fee | |
,Finance Charge | |
,Late Fee | |
,Service Fee | |
,Trade Commissions | |
Financial, | |
,Financial Advisor | |
,Life Insurance | |
Food & Dining, | |
,Alcohol & Bars | |
,Coffee Shops | |
,Fast Food | |
,Groceries | |
,Restaurants | |
Gifts & Donations, | |
,Charity | |
,Gift | |
,Charity & Gifts | |
,Tithe | |
Health & Fitness, | |
,Dentist | |
,Doctor | |
,Eyecare | |
,Gym | |
,Health Insurance | |
,Pharmacy | |
,Sports | |
Home, | |
,Furnishings | |
,Home Improvement | |
,Home Insurance | |
,Home Services | |
,Home Supplies | |
,Lawn & Garden | |
,Mortgage & Rent | |
Income, | |
,Bonus | |
,Interest Income | |
,Paycheck | |
,Reimbursement | |
,Rental Income | |
,Returned Purchase | |
Investments, | |
,Buy | |
,Deposit | |
,Dividend & Cap Gains | |
,Sell | |
,Withdrawal | |
Kids, | |
,Allowance | |
,Baby Supplies | |
,Babysitter & Daycare | |
,Child Support | |
,Kids Activities | |
,Toys | |
Loans, | |
,Loan Fees and Charges | |
,Loan Insurance | |
,Loan Interest | |
,Loan Payment | |
,Loan Principal | |
Personal Care, | |
,Hair | |
,Laundry | |
,Spa & Massage | |
,Jeff's Allowance | |
,Stacie's Allowance | |
Pets, | |
,Pet Food & Supplies | |
,Pet Grooming | |
,Veterinary | |
Shopping, | |
,Books | |
,Clothing | |
,Electronics & Software | |
,Hobbies | |
,Sporting Goods | |
Taxes, | |
,Federal Tax | |
,Local Tax | |
,Property Tax | |
,Sales Tax | |
,State Tax | |
Transfer, | |
,Credit Card Payment | |
,Transfer for Cash Spending | |
Travel, | |
,Air Travel | |
,Hotel | |
,Rental Car & Taxi | |
,Vacation | |
Uncategorized, | |
,Cash & ATM | |
,Check", header=FALSE) | |
#' Takes a two-column CSV in and converts it to a list. Assumes that the first column | |
#' is the first level of the list and the second column is the second level. Organizes | |
#' the list accordingly. | |
#' @param csv the data.frame described above | |
#' @return a list representing the two-column data.frame. | |
csvToList <- function(csv){ | |
trim <- function (x) gsub("^\\s+|\\s+$", "", x) | |
categories <- as.list(as.character(cat[trim(cat[,1]) != "",1])) | |
names(categories) <- trim(as.character(cat[trim(cat[,1]) != "",1])) | |
categories <- sapply(categories, assign, NULL) | |
thisCat <- "" | |
for (i in 1:nrow(cat)){ | |
if (trim(cat[i,1]) == ""){ | |
categories[[which(names(categories) == thisCat)]] <- c(categories[[which(names(categories) == thisCat)]], trim(cat[i,2])) | |
} else{ | |
thisCat <- trim(cat[i,1]) | |
} | |
} | |
categories | |
} | |
#' Find the specified category out of a two-level list and return the parent element | |
#' containing the given element, whether it's in the top level or the child level. | |
#' @param categories a two-level list of parent-child categories | |
#' @param categoryName the name of the category to be selected | |
#' @return the parent element containing the specified category name. | |
findCategory <- function(categories, categoryName){ | |
if (any(categoryName == names(categories))){ | |
return(categories[categoryName]) | |
} else{ | |
#Find the category within the list and remove the suffixed ID. | |
catName <- gsub("[[:digit:]]+$", "", names(unlist(categories))[unlist(categories) == categoryName]) | |
if(nchar(catName) == 0){ | |
#string not found | |
return(NULL) | |
} else{ | |
return(categories[catName]) | |
} | |
} | |
} | |
categories <- csvToList(cat) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Version: 1.0 | |
RestoreWorkspace: Yes | |
SaveWorkspace: Yes | |
AlwaysSaveHistory: Yes | |
EnableCodeIndexing: Yes | |
UseSpacesForTab: Yes | |
NumSpacesForTab: 2 | |
Encoding: UTF-8 | |
RnwWeave: Sweave | |
LaTeX: pdfLaTeX |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(httr) | |
#login | |
mintLogin <- function(username, password){ | |
if (nchar(password) > 16){ | |
warning("It seems that Mint trims passwords such as yours to only the first 16 characters. If you're unable to login, try trimming your password to the first 16 characters.") | |
} | |
response <- POST("https://wwws.mint.com/loginUserSubmit.xevent", body=list(username=username, password=password, task="L")) | |
#hackish solution due to non-public API. Looks like the only difference to the request is the URL. | |
if (response$url == "https://wwws.mint.com/app/internalError.html"){ | |
return(TRUE) | |
} | |
return(FALSE) | |
} | |
#download | |
downloadMintTransactions <- function(){ | |
response <- GET("https://wwws.mint.com/transactionDownload.event?") | |
if (response$url == "https://wwws.mint.com/accessDenied.html"){ | |
stop("It looks like you're not logged in, try using mintLogin() to authenticate before trying to download your transactions.") | |
} | |
transactions <- read.csv(text=(content(response, as="text"))) | |
transactions | |
} | |
#logout | |
mintLogout <- function(){ | |
GET("https://wwws.mint.com/login.event?task=L&messageId=5&country=US") | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Prototype of how to read INI files to process olfactometer data | |
# efg, 13 June 2007 | |
# Thanks to Gabor Grothendieck for helpful suggestions in the R-Help | |
# mailing list on how to parse the INI file. | |
parseINI <- function(INI.filename) | |
{ | |
connection <- file(INI.filename) | |
Lines <- readLines(connection) | |
close(connection) | |
Lines <- chartr("[]", "==", Lines) # change section headers | |
connection <- textConnection(Lines) | |
d <- read.table(connection, as.is = TRUE, sep = "=", fill = TRUE, quote="") | |
close(connection) | |
L <- d$V1 == "" # location of section breaks | |
d <- subset(transform(d, V3 = V2[which(L)[cumsum(L)]])[1:3], | |
V1 != "") | |
ToParse <- paste("INI.list$`", d$V3, "`$", d$V1, " <- \"", | |
d$V2, "\"", sep="") | |
INI.list <- list() | |
eval(parse(text=ToParse)) | |
#replace with logical values | |
INI.list <- rapply(INI.list, function(x){if(x=="TRUE"){return(TRUE)}; if(x=="FALSE"){return(FALSE)}; return(x)}, how="replace") | |
#replace numeric values | |
INI.list <- rapply(INI.list, function(x){if(grepl("^[[:digit:]]+$", x)){return(as.numeric(x))}; return(x)}, how="replace") | |
return(INI.list) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(lubridate) | |
library(stringr) | |
library(digest) | |
library(plyr) | |
trans <- read.csv("transactions/transactions.csv", header=TRUE) | |
trans[trans$Transaction.Type == "debit","Amount"] <- -trans[trans$Transaction.Type == "debit","Amount"] | |
trans$Date <- mdy(as.character(trans$Date)) | |
#exclude anything excluded from Mint, as it's probably old or irrelevant to the budgets | |
trans <- trans[trans$Category != "Exclude From Mint",] | |
#' Plot the monthly performance of a category. | |
#' @param data The transaction-level data | |
#' @param categoryName the name(s) of the categories | |
#' @param budget The amount budgeted for this category | |
#' @param negateAmounts whether or not to negate the amounts so that negative debits | |
#' will be plotted as positive numbers | |
plotMonthlyCategory <- function(data, categoryName, budget, negateAmounts = TRUE){ | |
if (!missing(categoryName)){ | |
if (typeof(categoryName) == "character"){ | |
thisCat <- data[data$Category %in% categoryName,] | |
friendlyCatName <- paste(categoryName, collapse="/") | |
} | |
if (typeof(categoryName) == "list"){ | |
catNames <- c(names(categoryName), unlist(categoryName, use.names=FALSE)) | |
thisCat <- data[data$Category %in% catNames,] | |
friendlyCatName <- names(categoryName) | |
} | |
} else{ | |
thisCat <- data | |
} | |
if (negateAmounts){ | |
thisCat$Amount <- -thisCat$Amount | |
} | |
thisCat$Month <- year(thisCat$Date) * 12 + month(thisCat$Date) | |
monthText <- data.frame(Date=ymd("2001-01-01"), Amount=0, Color=0) | |
monthText <- monthText[-1,] | |
allMonths <- NULL | |
for (month in unique(thisCat$Month)){ | |
thisMonth <- thisCat[thisCat$Month == month,] | |
thisMonth <- thisMonth[order(thisMonth$Date),] | |
endMonth <- thisMonth[1,"Date"] | |
day(endMonth) <- 1 | |
month(endMonth) <- month(endMonth)+1 | |
thisMonthText <- list(Date=endMonth, Amount=sum(thisMonth$Amount),Color="Under") | |
levels(thisMonthText$Color) <- c("Under", "Over") | |
if (!missing(budget) & budget < sum(thisMonth$Amount)){ | |
thisMonthText$Color <- "Over" | |
} | |
monthText <- rbind(monthText, thisMonthText) | |
#rbind messes up our classes, we'll have to manually recover the date column | |
class(monthText$Date) <- c("POSIXct", "POSIXt") | |
#Add the first day of the month at 0. | |
thisMonth <- rbind(thisMonth[1,], thisMonth) | |
thisMonth$Amount[1] <- 0 | |
day(thisMonth$Date[1]) <- 1 | |
thisMonth$Amount <- cumsum(thisMonth$Amount) | |
#Add the last day of the month, if it's not the last month we have | |
if (month != max(unique(thisCat$Month))){ | |
thisMonth <- rbind(thisMonth, thisMonth[nrow(thisMonth),]) | |
day(thisMonth[nrow(thisMonth),"Date"]) <- days_in_month(month(thisMonth[nrow(thisMonth),"Date"])) | |
} | |
color <- rep(1, nrow(thisMonth)) | |
if (!missing(budget)){ | |
color <- ifelse(budget < thisMonth$Amount, "Over", "Under") | |
} | |
thisMonth$Budget <- color | |
allMonths <- rbind(allMonths, thisMonth) | |
} | |
p <- ggplot(allMonths, aes(Date, ymin=0, y=Amount, ymax=Amount)) + geom_point() + geom_ribbon() + | |
geom_text(data=monthText, aes(Date, Amount*1.04, label=Amount)) + | |
geom_hline(yintercept=0) | |
if (!missing(budget)){ | |
p <- p + geom_hline(yintercept=budget, col=2) | |
} | |
if (!is.null(friendlyCatName)){ | |
p <- p + ggtitle(friendlyCatName) | |
} | |
p | |
} | |
budg <- parseINI("budgets/singleNursing.ini") | |
#' Get the budget of the selected category from the vector of specified budgets. | |
#' @param budgetList a data structure who's top-level names correspond to the budget names | |
#' @param category The category or categories for which we desire to get the budgets. | |
#' If this is a character vector, all budgets with matching titles will be extracted. | |
#' If this is a list, all budgets matching any element in the list will be extracted | |
#' and summed before returning. | |
getBudget <- function(budgetList, category){ | |
if (typeof(category) == "character"){ | |
return(budgetList[category]) | |
} | |
if (typeof(category) == "list"){ | |
innerBudget <- budgetList[unlist(category, use.names=FALSE)] | |
#remove empty elements | |
innerBudget <- innerBudget[!is.na(names(innerBudget))] | |
budget <- list() | |
budget[[names(category)[1]]] <- innerBudget | |
#only process frequency if all inner budgets are on the same frequency. | |
if(all(sapply(budget[[1]], "[[", "frequency") == budget[[1]][[1]][["frequency"]])){ | |
budget$amount <- sum(sapply(budget[[1]], "[[", "amount")) | |
budget$frequency <- budget[[1]][[1]][["frequency"]] | |
} | |
return(budget) | |
} | |
} | |
#' Calculate the number of months in a given frequency of budget | |
#' @param frequency the frequency of the budget | |
#' @return the number of months in the specified budget | |
getMonths <- function(frequency=c("month", "biannual", "annual", "biennual")){ | |
frequency <- match.arg(frequency) | |
switch(frequency, | |
"month"=1, | |
"biannual"=6, | |
"annual"=12, | |
"biennual"=24) | |
} | |
computeBudget <- function (budgets, transactions, startDate=min(transactions$Date), endDate=max(transactions$Date)){ | |
transactions$month <- (year(transactions$Date)*12) + month(transactions$Date) | |
if (!missing(startDate)){ | |
minMonth <- (year(startDate) * 12) + month(startDate) | |
} else{ | |
minMonth <- min(transactions$month) | |
} | |
if (!missing(endDate)){ | |
maxMonth <- (year(endDate) * 12) + month(endDate) | |
} else{ | |
maxMonth <- max(transactions$month) | |
} | |
#process each month independently | |
for (i in minMonth:maxMonth){ | |
#extract the transacations that occured in this month | |
curMonth <- i %% 12 | |
if(curMonth == 0){ | |
curMonth <- 12 | |
} | |
curYear <- floor((i-1)/12) | |
#process each transaction in its category and potentially in parent category's budget | |
monthTrans <- transactions[month(transactions$Date) == curMonth & year(transactions$Date) == curYear,] | |
#process each budget separately | |
for (b in names(budgets)){ | |
bud <- budgets[[b]] | |
if(is.null(bud$transactions)){ | |
bud$transactions <- data.frame(list(Date=mdy(paste(curMonth, 1, curYear, sep="-"), quiet=TRUE), | |
Amount = bud$amount, | |
Transaction = "allowance")) | |
} else{ | |
#only distribute allowance or reset balance on appropriate months based on | |
#the budget's frequency. | |
if ((i - minMonth) %% getMonths(bud$frequency) == 0){ | |
if (bud$rollover){ | |
amt <- bud$amount + bud$transactions$Amount[nrow(bud$transactions)] | |
} else{ | |
amt <- bud$amount | |
} | |
bud$transactions <- rbind(bud$transactions, | |
data.frame(list(Date=mdy(paste(curMonth, 1, curYear, sep="-"), quiet=TRUE), | |
Amount = amt, | |
Transaction = "allowance"))) | |
} | |
} | |
#Find this category in the list of categories so we can determine if we | |
# need to include the parent category. | |
cats <- findCategory(categories, b) | |
if (names(cats) == b){ | |
#This is the parent category | |
cats <- unlist(cats) | |
} | |
else { | |
#This is a child category | |
cats <- c(names(cats), b) | |
} | |
thisBudTrans <- monthTrans[monthTrans$Category %in% cats,] | |
#order by date | |
thisBudTrans <- thisBudTrans[order(thisBudTrans$Date),] | |
bud$transactions <- rbind(bud$transactions, | |
data.frame(list(Date=thisBudTrans$Date, | |
Amount=bud$transactions$Amount[nrow(bud$transactions)] + cumsum(thisBudTrans$Amount), | |
Transaction=apply(thisBudTrans, 1, digest, algo="sha256")))) | |
budgets[[b]] <- bud | |
} | |
} | |
budgets | |
} | |
#' Plot a grid of the budgets specified. Defaults to all budgets | |
#' @param budgets the transaction-annotate list of budgets as output by the computeBudgets() function. | |
#' @param budgetNames the names of the budgets to include in the grid | |
plotBudgets <- function(budgets, budgetNames=names(budgets)){ | |
#get all transactions into a single data.frame | |
buds <- lapply(budgets, "[[", "transactions") | |
allTrans <- NULL | |
for (b in budgetNames){ | |
thisBudget <- buds[[b]] | |
thisBudget$Budget <- b | |
allTrans <- rbind(allTrans, thisBudget) | |
} | |
ggplot(allTrans, aes(Date, Amount)) + facet_wrap(~Budget, scales="free_y") + | |
geom_line() + | |
geom_point(data=allTrans[allTrans$Amount >= 0,]) + | |
geom_point(data=allTrans[allTrans$Amount < 0,], color="red") + | |
geom_hline(aes(yintercept=0)) | |
} | |
#' NOTE: Only works on monthly budgets | |
calculateRemainder <- function(budgets, | |
startMonth=year(now())*12 + month(now()), | |
endMonth=year(now())*12 + month(now())){ | |
#FIXME: currently only works on monthly budgets | |
#grab the budgets which are reset each month -- i.e. any leftover would go to savings | |
saveable <- names(which(!sapply(budgets,"[[","rollover") & sapply(budgets,"[[","frequency") == "month")) | |
remainders <- array(dim=c(endMonth-startMonth+1, | |
length(saveable)), | |
dimnames=list( | |
Month=as.character(startMonth:endMonth), | |
Budget=saveable)) | |
for (b in saveable){ | |
bud <- budgets[[b]] | |
tranDate <- year(bud$transactions$Date)*12 + month(bud$transactions$Date) | |
budTran <- bud$transactions[tranDate >= startMonth & tranDate <= endMonth,] | |
finalInd <- c(which(budTran$Transaction == "allowance")-1, nrow(budTran)) | |
finalBal <- budTran[finalInd,"Amount"] | |
remainders[,b] <- finalBal | |
} | |
remainders | |
} | |
budgets <- computeBudget(budg, trans) | |
remainders <- calculateRemainder(budgets, startMonth=2012*12+6, endMonth=2012*12+12) | |
ad <- adply(remainders, 1:2) | |
ggplot(ad, aes(x=factor(Month), y=V1, fill=Budget)) + geom_bar(stat="identity", position="dodge") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment