Last active
October 18, 2017 13:55
-
-
Save lvisintini/44ce7386c24284fd0a0a77307d6727f7 to your computer and use it in GitHub Desktop.
Format Bank Statement dates to use isoformat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import re | |
import datetime | |
from calendar import monthrange | |
from collections import OrderedDict | |
from dateutil.relativedelta import relativedelta | |
PATH = '/home/lvisintini/Downloads/statement' | |
YEAR_REG = r'[0-9]{4}' | |
MONTH_REG = r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)' | |
DAY_REG = r'[0-9]{2}' | |
class DateParser(object): | |
F1 = re.compile(f'^{DAY_REG} {MONTH_REG} - {DAY_REG} {MONTH_REG} {YEAR_REG}$') | |
F2 = re.compile(f'^{DAY_REG} {MONTH_REG} {YEAR_REG} - {DAY_REG} {MONTH_REG} {YEAR_REG}$') | |
F3 = re.compile(f'^Annual Summary - {DAY_REG} {MONTH_REG} {YEAR_REG} - {DAY_REG} {MONTH_REG} {YEAR_REG}$') | |
@classmethod | |
def parse(cls, value): | |
mapping = OrderedDict() | |
mapping[cls.F1] = cls.parse_f1 | |
mapping[cls.F2] = cls.parse_f2 | |
mapping[cls.F3] = cls.parse_f3 | |
for regex, parser in mapping.items(): | |
if regex.match(value): | |
return parser(value) | |
else: | |
print('Failed to parse date "{}" '.format(value)) | |
return None, None | |
@staticmethod | |
def parse_f1(value): | |
date0, date1 = value.split(' - ') | |
date1 = datetime.datetime.strptime(date1, "%d %b %Y").date() | |
date0 = '{} {}'.format(date0, date1.year) | |
date0 = datetime.datetime.strptime(date0, "%d %b %Y").date() | |
return date0, date1 | |
@staticmethod | |
def parse_f2(value): | |
dates = sorted([datetime.datetime.strptime(x, "%d %b %Y").date() for x in value.split(' - ')]) | |
return dates[0], dates[1] | |
@staticmethod | |
def parse_f3(value): | |
value = value.replace('Annual Summary - ', '') | |
dates = sorted([datetime.datetime.strptime(x, "%d %b %Y").date() for x in value.split(' - ')]) | |
return dates[0], dates[1] | |
def main(): | |
new_names = {} | |
for dirpath, _, filenames in os.walk(PATH): | |
for f in filenames: | |
date_string, extension = f.split('.') | |
start, end = DateParser.parse(date_string) | |
new_name = '{} - {}'.format(start.isoformat(), end.isoformat()) | |
if 'Annual Summary' in date_string: | |
new_name = '{} - {} - Annual Summary'.format(start.isoformat(), end.isoformat()) | |
new_names[os.path.join(dirpath, f)] = os.path.join(dirpath, f'{new_name}.{extension}') | |
for old_name, new_name in new_names.items(): | |
shutil.copy2(old_name, new_name) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment