Last active
March 27, 2023 03:37
-
-
Save Enquier/241401089eccd257b8e6f0002b962e8f to your computer and use it in GitHub Desktop.
Chase Amazon Credit Card Statement Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chase_amazon_parse.ot -- Convert Chase credit card statements from PDF to CSV. Written | |
# to easily import older statements into any financial software that supports CSV import, | |
#. Most importantly it parses your Amazon order numbers so you can compare them with Amazon.com | |
# for budgeting and planning. This is a Python port of the most excellent chasepdf2csv by ivy evans | |
# | |
# How to Use | |
# ---------- | |
# This script requires Python >3.0.0 and pdftotext. | |
# | |
# ISC License | |
# ----------- | |
# Copyright 2023 - Enquier | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
# PERFORMANCE OF THIS SOFTWARE. | |
import csv | |
import subprocess | |
import re | |
import sys | |
from optparse import OptionParser | |
def error(msg): | |
sys.stderr.write("error: %s\n" % msg) | |
def fatal(msg): | |
error(msg) | |
sys.exit(1) | |
class Statement: | |
DUE_DATE_PATTERN = re.compile( | |
r"Payment\s+Due\s+Date:?\s+(?P<month>\d{2})/(?P<day>\d{2})/(?P<year>\d{2})" | |
) | |
class Transaction: | |
# Regex for matching transactions in a Chase credit statement. | |
# | |
# Edge Case: Amazon orders | |
# | |
# 01/23 AMAZON MKTPLACE PMTS AMZN.COM/BILL WA 12.34\n | |
# Order Number 123-4567890-1234567\n | |
# | |
# Edge Case: Rewards points | |
# | |
# 01/23 AMAZON MARKETPLACE AMZN.COM/BILLWA 4.56 7,890 | |
# | |
LINE_ITEM_PATTERN = re.compile( | |
r"(?P<date>\d{2}/\d{2})\s+(?P<description>.+)\s+(?P<amount>-?[\d,]+\.\d{2})" | |
r"([ ](?P<points>[1-9][\d,]+)?|\s*Order\s+Number\s+(?P<order_num>[^\s]+))?" | |
) | |
@classmethod | |
def scan(cls, output, year): | |
return [cls(match, year) for match in cls.LINE_ITEM_PATTERN.finditer(output)] | |
def __init__(self, data, year): | |
self.date = f"{data['date']}/{year}" | |
self.description = data["description"] | |
self.amount = data["amount"] | |
self.points = data["points"] | |
self.order_num = data["order_num"] | |
def __str__(self): | |
return f"{self.date} - {self.description}: {self.amount}" | |
def __repr__(self): | |
return str(self) | |
def description_with_order_num(self): | |
if self.order_num: | |
return f"{self.description} #{self.order_num}" | |
return self.description | |
def to_dict(self): | |
return { | |
"date": self.date, | |
"description": self.description_with_order_num(), | |
"amount": self.amount, | |
"points": self.points, | |
"order_num": self.order_num, | |
} | |
def __init__(self, line_items): | |
self.line_items = line_items | |
@classmethod | |
def parse(cls, path): | |
output = subprocess.check_output(["pdftotext", "-raw", path, "-"]).decode("utf-8") | |
m = cls.DUE_DATE_PATTERN.search(output) | |
if not m: | |
fatal(f"parse error: could not match due date in {path}") | |
return cls(Statement.Transaction.scan(output, m["year"])) | |
def each_line_item(self): | |
yield from self.line_items | |
def main(args=None): | |
if args is None: | |
args = sys.argv[1:] | |
try: | |
subprocess.check_output(["pdftotext", "-v"], stderr=subprocess.DEVNULL) | |
except subprocess.CalledProcessError: | |
fatal("pdftotext not found!") | |
outfile = sys.stdout | |
options = OptionParser( | |
usage="%prog [options] FILE...", version="%prog 1.0", | |
) | |
options.add_option( | |
"-o", | |
"--output", | |
dest="output_file", | |
default=None, | |
help="Output to file", | |
) | |
options.add_option( | |
"-f", | |
"--format", | |
dest="format", | |
default="csv", | |
help="Output format: csv (default) or tsv", | |
) | |
opts, args = options.parse_args(args) | |
if not args: | |
fatal("no files specified!") | |
if opts.output_file: | |
try: | |
outfile = open(opts.output_file, "w") | |
except IOError as e: | |
fatal(str(e)) | |
writer = csv.writer(outfile, delimiter="\t" if opts.format == "tsv" else ",") | |
writer.writerow( | |
[ | |
"Date", | |
"Description", | |
"Amount", | |
"Points", | |
"Order Number", | |
] | |
) | |
for path in args: | |
statement = Statement.parse(path) | |
for line_item in statement.each_line_item(): | |
writer.writerow( | |
[ | |
line_item.date, | |
line_item.description_with_order_num(), | |
line_item.amount, | |
line_item.points or "", | |
line_item.order_num or "", | |
] | |
) | |
outfile.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment