Skip to content

Instantly share code, notes, and snippets.

@Enquier
Last active March 27, 2023 03:37
Show Gist options
  • Save Enquier/241401089eccd257b8e6f0002b962e8f to your computer and use it in GitHub Desktop.
Save Enquier/241401089eccd257b8e6f0002b962e8f to your computer and use it in GitHub Desktop.
Chase Amazon Credit Card Statement Parser
chase_amazon_parse.ot -- Convert Chase credit card statements from PDF to CSV. Written
# to easily import older statements into any financial software that supports CSV import,
#. Most importantly it parses your Amazon order numbers so you can compare them with Amazon.com
# for budgeting and planning. This is a Python port of the most excellent chasepdf2csv by ivy evans
#
# How to Use
# ----------
# This script requires Python >3.0.0 and pdftotext.
#
# ISC License
# -----------
# Copyright 2023 - Enquier
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
import csv
import subprocess
import re
import sys
from optparse import OptionParser
def error(msg):
sys.stderr.write("error: %s\n" % msg)
def fatal(msg):
error(msg)
sys.exit(1)
class Statement:
DUE_DATE_PATTERN = re.compile(
r"Payment\s+Due\s+Date:?\s+(?P<month>\d{2})/(?P<day>\d{2})/(?P<year>\d{2})"
)
class Transaction:
# Regex for matching transactions in a Chase credit statement.
#
# Edge Case: Amazon orders
#
# 01/23 AMAZON MKTPLACE PMTS AMZN.COM/BILL WA 12.34\n
# Order Number 123-4567890-1234567\n
#
# Edge Case: Rewards points
#
# 01/23 AMAZON MARKETPLACE AMZN.COM/BILLWA 4.56 7,890
#
LINE_ITEM_PATTERN = re.compile(
r"(?P<date>\d{2}/\d{2})\s+(?P<description>.+)\s+(?P<amount>-?[\d,]+\.\d{2})"
r"([ ](?P<points>[1-9][\d,]+)?|\s*Order\s+Number\s+(?P<order_num>[^\s]+))?"
)
@classmethod
def scan(cls, output, year):
return [cls(match, year) for match in cls.LINE_ITEM_PATTERN.finditer(output)]
def __init__(self, data, year):
self.date = f"{data['date']}/{year}"
self.description = data["description"]
self.amount = data["amount"]
self.points = data["points"]
self.order_num = data["order_num"]
def __str__(self):
return f"{self.date} - {self.description}: {self.amount}"
def __repr__(self):
return str(self)
def description_with_order_num(self):
if self.order_num:
return f"{self.description} #{self.order_num}"
return self.description
def to_dict(self):
return {
"date": self.date,
"description": self.description_with_order_num(),
"amount": self.amount,
"points": self.points,
"order_num": self.order_num,
}
def __init__(self, line_items):
self.line_items = line_items
@classmethod
def parse(cls, path):
output = subprocess.check_output(["pdftotext", "-raw", path, "-"]).decode("utf-8")
m = cls.DUE_DATE_PATTERN.search(output)
if not m:
fatal(f"parse error: could not match due date in {path}")
return cls(Statement.Transaction.scan(output, m["year"]))
def each_line_item(self):
yield from self.line_items
def main(args=None):
if args is None:
args = sys.argv[1:]
try:
subprocess.check_output(["pdftotext", "-v"], stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
fatal("pdftotext not found!")
outfile = sys.stdout
options = OptionParser(
usage="%prog [options] FILE...", version="%prog 1.0",
)
options.add_option(
"-o",
"--output",
dest="output_file",
default=None,
help="Output to file",
)
options.add_option(
"-f",
"--format",
dest="format",
default="csv",
help="Output format: csv (default) or tsv",
)
opts, args = options.parse_args(args)
if not args:
fatal("no files specified!")
if opts.output_file:
try:
outfile = open(opts.output_file, "w")
except IOError as e:
fatal(str(e))
writer = csv.writer(outfile, delimiter="\t" if opts.format == "tsv" else ",")
writer.writerow(
[
"Date",
"Description",
"Amount",
"Points",
"Order Number",
]
)
for path in args:
statement = Statement.parse(path)
for line_item in statement.each_line_item():
writer.writerow(
[
line_item.date,
line_item.description_with_order_num(),
line_item.amount,
line_item.points or "",
line_item.order_num or "",
]
)
outfile.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment