Created
September 19, 2012 15:48
-
-
Save danielballan/3750403 to your computer and use it in GitHub Desktop.
Corning Credit Union parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
PURPOSE: Parse a pile of 'spreadsheet' files downloaded from | |
Corning Credit Union eBranch. | |
FEAUTURES: - Parse and combine financial records from a list of raw | |
files, converting the data into convenient types. | |
- Plot stuff. | |
EXAMPLE USAGE: | |
import ccu | |
checking = ccu.read_checking(ccu.list_files('Downloads/checking/')) | |
ccu.running_balance(checking) # It's a plot! | |
""" | |
import os | |
import re | |
import decimal as dec | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
dec.getcontext().prec = 2 # a datatype for money (2 decimal places) | |
def cast_money(s): | |
pattern = '(\-?)\$?([1-9]+\.[0-9][0-9])' | |
try: | |
sign, stripped = re.search(pattern, s).group(1,2) | |
except AttributeError: | |
return s | |
if sign: | |
stripped = '-' + stripped | |
return dec.Decimal(stripped) | |
def read_visa(filenames): | |
"""Read CSV files downloaded from CCU. Parse them, | |
concatenate all their data, | |
and return a DataFrame array sorted by date.""" | |
if type(filenames) is str: | |
filenames = list(filenames) # a single-element list | |
df = pd.concat( | |
[pd.read_csv(filename, | |
names=['date', 'description', | |
'amount', 'fee', | |
'interest', 'principal', | |
'balance','placeholder'], | |
parse_dates=[0], | |
converters = {'amount': cast_money, | |
'fee': cast_money, | |
'interest': cast_money, | |
'principal': cast_money, | |
'balance': cast_money}) | |
for filename in filenames]) | |
del df['placeholder'] # hanlded CCU's trailing comma | |
return df.sort('date') | |
def read_checking(filenames): | |
"""Read CSV files downloaded from CCU. Parse them, | |
concatenate all their data, | |
and return a DataFrame array sorted by date.""" | |
if type(filenames) is str: | |
filenames = list(filenames) # a single-element list | |
df = pd.concat( | |
[pd.read_csv(filename, | |
names=['date', 'description', | |
'amount', 'balance', | |
'placeholder'], | |
parse_dates=[0], | |
converters = {'amount': cast_money, | |
'balance': cast_money}) | |
for filename in filenames]) | |
del df['placeholder'] # hanlded CCU's trailing comma | |
return df.sort('date') | |
def running_balance(df): | |
"Plot 'balance' versus 'date'." | |
plt.plot(df['date'], df['balance']) | |
plt.gcf().autofmt_xdate() | |
plt.show() | |
def list_files(dir): | |
"List the absolute paths of the files in a directory." | |
return [os.path.join(dir, f) for f in os.listdir(dir)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment