Skip to content

Instantly share code, notes, and snippets.

@danielballan
Created September 19, 2012 15:48
Show Gist options
  • Save danielballan/3750403 to your computer and use it in GitHub Desktop.
Save danielballan/3750403 to your computer and use it in GitHub Desktop.
Corning Credit Union parser
"""
PURPOSE: Parse a pile of 'spreadsheet' files downloaded from
Corning Credit Union eBranch.
FEAUTURES: - Parse and combine financial records from a list of raw
files, converting the data into convenient types.
- Plot stuff.
EXAMPLE USAGE:
import ccu
checking = ccu.read_checking(ccu.list_files('Downloads/checking/'))
ccu.running_balance(checking) # It's a plot!
"""
import os
import re
import decimal as dec
import pandas as pd
import matplotlib.pyplot as plt
dec.getcontext().prec = 2 # a datatype for money (2 decimal places)
def cast_money(s):
pattern = '(\-?)\$?([1-9]+\.[0-9][0-9])'
try:
sign, stripped = re.search(pattern, s).group(1,2)
except AttributeError:
return s
if sign:
stripped = '-' + stripped
return dec.Decimal(stripped)
def read_visa(filenames):
"""Read CSV files downloaded from CCU. Parse them,
concatenate all their data,
and return a DataFrame array sorted by date."""
if type(filenames) is str:
filenames = list(filenames) # a single-element list
df = pd.concat(
[pd.read_csv(filename,
names=['date', 'description',
'amount', 'fee',
'interest', 'principal',
'balance','placeholder'],
parse_dates=[0],
converters = {'amount': cast_money,
'fee': cast_money,
'interest': cast_money,
'principal': cast_money,
'balance': cast_money})
for filename in filenames])
del df['placeholder'] # hanlded CCU's trailing comma
return df.sort('date')
def read_checking(filenames):
"""Read CSV files downloaded from CCU. Parse them,
concatenate all their data,
and return a DataFrame array sorted by date."""
if type(filenames) is str:
filenames = list(filenames) # a single-element list
df = pd.concat(
[pd.read_csv(filename,
names=['date', 'description',
'amount', 'balance',
'placeholder'],
parse_dates=[0],
converters = {'amount': cast_money,
'balance': cast_money})
for filename in filenames])
del df['placeholder'] # hanlded CCU's trailing comma
return df.sort('date')
def running_balance(df):
"Plot 'balance' versus 'date'."
plt.plot(df['date'], df['balance'])
plt.gcf().autofmt_xdate()
plt.show()
def list_files(dir):
"List the absolute paths of the files in a directory."
return [os.path.join(dir, f) for f in os.listdir(dir)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment