Skip to content

Instantly share code, notes, and snippets.

@Attumm
Created January 25, 2019 16:22
Show Gist options
  • Select an option

  • Save Attumm/825a6bc90fdb8d1c3de9a6eccbd8f09a to your computer and use it in GitHub Desktop.

Select an option

Save Attumm/825a6bc90fdb8d1c3de9a6eccbd8f09a to your computer and use it in GitHub Desktop.
parse date
======== parsing pdf =======
woensdag 2 januari 2019
dinsdag 31 december 2019
vrijdag 31 december 2021
vrijdag 31 december 2021
donderdag 6 december 2018 Om 23:59
vrijdag 7 december 2018 Vóór 23:59
maandag 10 december 2018 Vóór 23:59
woensdag 12 december 2018 Vóór 23:59
maandag 31 december 2018
======== parsing pdf =======
maandag 7 januari 2019
dinsdag 30 april 2019
donderdag 30 april 2020
donderdag 6 december 2018 Om 23:59
vrijdag 7 december 2018 Vóór 23:59
maandag 10 december 2018 Vóór 23:59
woensdag 12 december 2018 Vóór 23:59
maandag 31 december 2018
======== parsing pdf =======
dinsdag 1 januari 2019
donderdag 28 februari 2019
donderdag 31 december 2020
donderdag 31 december 2020
donderdag 6 december 2018 Om 23:59
vrijdag 7 december 2018 Vóór 23:59
maandag 10 december 2018 Vóór 23:59
woensdag 12 december 2018 Vóór 23:59
maandag 31 december 2018
======== parsing pdf =======
maandag 11 februari 2019
dinsdag 31 december 2019
zaterdag 31 december 2022
donderdag 17 januari 2019 Om 23:59
vrijdag 18 januari 2019 Vóór 23:59
maandag 21 januari 2019 Vóór 23:59
woensdag 23 januari 2019 Vóór 23:59
donderdag 7 februari 2019
======== parsing pdf =======
maandag 11 februari 2019
dinsdag 31 december 2019
vrijdag 31 december 2021
vrijdag 31 december 2021
donderdag 17 januari 2019 Om 23:59
vrijdag 18 januari 2019 Vóór 23:59
maandag 21 januari 2019 Vóór 23:59
donderdag 24 donderdag 2019 Vóór 23:59
donderdag 7 februari 2019
import sys
import datetime
class UnableToParse(Exception):
pass
trans_day = {
'zondag': 0,
'maandag': 1,
'dinsdag': 2,
'woensdag': 3,
'donderdag': 4,
'vrijdag': 5,
'zaterdag': 6,
}
trans_month = {
'januari': 1,
'februari': 2,
'maart': 3,
'april': 4,
'mei': 5,
'juni': 6,
'juli': 7,
'augustus': 8,
'september': 9,
'oktober': 10,
'november': 11,
'december': 12,
}
def get_day_name(raw_item):
try:
return raw_item[0]
except IndexError:
raise UnableToParse
def get_day_week(raw_item):
try:
return trans_day[raw_item[0]]
except KeyError:
raise UnableToParse
def get_day(raw_item):
try:
return int(raw_item[1])
except (IndexError, ValueError):
raise UnableToParse
def get_month_name(raw_item):
try:
return raw_item[2]
except IndexError:
raise UnableToParse
def get_month(raw_item):
try:
return trans_month[raw_item[2]]
except KeyError:
raise UnableToParse
def get_year(raw_item):
try:
return int(raw_item[3])
except (IndexError, ValueError):
raise UnableToParse
def parse_item(line):
raw_item = line.split()
try:
return {
'day_name': get_day_name(raw_item),
'day_week': get_day_week(raw_item),
'day': get_day(raw_item),
'month_name': get_month_name(raw_item),
'month': get_month(raw_item),
'year': get_year(raw_item),
}
except UnableToParse:
return None
def valid_lines(filename):
for line in (i for i in open(filename)):
if len(line) < 10:
continue
if '====' in line:
continue
if 'parsing' in line:
continue
yield line
# validation not yet used
def valid_day_of_month(i):
return i['number'] < 32 # TODO extend with last day of month check
if __name__ == '__main__':
filename= sys.argv[sys.argv.index('-f')+1] if '-f' in sys.argv else 'dates.txt'
for line in valid_lines(filename):
item = parse_item(line)
if item is None:
print(line)
else:
date = datetime.datetime(year=item['year'], month=item['month'], day=item['day'])
print(date.isoformat())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment