Created
September 13, 2013 15:39
-
-
Save philipbl/6552329 to your computer and use it in GitHub Desktop.
Parses time data into total hours. This can be used to parse time card information. See the grammar at the top of the file to know how what is allowed. Example: 10:01 AM to 12:41 PM and 3:15 PM to 6:11 PM - 10 min + 1 hour
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
from datetime import datetime | |
# <COMPOUND_STATEMENT> : <STATEMENT> (and <COMPOUND_STATEMENT>)* | |
# <STATEMENT> : <ABS_TIME> to <ABS_TIME> (<REL_OPERATOR> <REL_TIME>)* | |
# <REL_OPERATOR> : -, + | |
# <ABS_TIME> : XX:XX AM|PM | |
# <REL_TIME> : X+ min|hour|mins|hours | |
def convert_to_minutes(time): | |
result = re.search(r"(\d+)\s*?(min|mins)", time) | |
if result is not None: | |
num = result.group(1) | |
return int(num) | |
result = re.search(r"(\d+)\s*?(hour|hours)", time) | |
num = result.group(1) | |
return int(num) * 60 | |
def format_time(time): | |
result = re.search(r"((?:0?[1-9]|1[0-2]):[0-5][0-9])\s*?(AM|PM)", time) | |
return "{} {}".format(result.group(1), result.group(2)) | |
def calc_time_diff(t1, t2): | |
t1 = datetime.strptime(format_time(t1), "%I:%M %p") | |
t2 = datetime.strptime(format_time(t2), "%I:%M %p") | |
return (t2 - t1).seconds / 60 | |
def add_time(op, time): | |
time = convert_to_minutes(time) | |
if op == '-': | |
return -1 * time | |
else: | |
return time | |
def process_tokens(tokens): | |
return process_statement(tokens, 0) / 60.0 | |
def process_statement(tokens, total_time): | |
time1 = tokens.pop() | |
op = tokens.pop() | |
time2 = tokens.pop() | |
# checking to make sure all the tokens are good | |
if time1[0] != "ABS_TIME": raise Exception | |
if op != "TO": raise Exception | |
if time2[0] != "ABS_TIME": raise Exception | |
total_time += calc_time_diff(time1[1], time2[1]) | |
return process_next_stament(tokens, total_time) | |
def process_rel_statement(tokens, total_time): | |
op = tokens.pop() | |
time = tokens.pop() | |
# checking to make sure all the tokens are good | |
if time[0] != "REL_TIME": raise Exception | |
total_time += add_time(op[1], time[1]) | |
return process_next_stament(tokens, total_time) | |
def process_next_stament(tokens, total_time): | |
if len(tokens) == 0: | |
return total_time | |
elif tokens[-1][0] == 'REL_OPERATOR': | |
return process_rel_statement(tokens, total_time) | |
elif tokens[-1] == 'AND': | |
tokens.pop() | |
return process_statement(tokens, total_time) | |
else: | |
raise Exception | |
def tokenize(input_): | |
abs_time = lambda scanner, token: ("ABS_TIME", token) | |
rel_time = lambda scanner, token: ("REL_TIME", token) | |
rel_operator = lambda scanner, token: ("REL_OPERATOR", token) | |
abs_operator = lambda scanner, token: ("TO") if token == "to" else ("AND") | |
scanner = re.Scanner([(r"(0?[1-9]|1[0-2]):[0-5][0-9]\s*?(AM|PM)", abs_time), | |
(r"\d+\s*?(mins|min|hours|hour)", rel_time), | |
(r"to", abs_operator), | |
(r"and", abs_operator), | |
(r"-", rel_operator), | |
(r"\+", rel_operator), | |
(r"\s+", None)]) | |
tokens, remainder = scanner.scan(input_) | |
tokens.reverse() | |
if remainder != '': | |
raise Exception('Error: Unable to tokenize "{}"'.format(remainder)) | |
return tokens | |
def run_parser(input_): | |
try: | |
tokens = tokenize(input_) | |
hours = process_tokens(tokens) | |
return "{:,} hours".format(round(hours, 2)) | |
except: | |
raise Exception("An error occurred while running the parser.") | |
try: | |
import editor | |
import workflow | |
input_ = workflow.get_input() | |
output_ = run_parser(input_) | |
workflow.set_output(output_) | |
except ImportError: | |
import sys | |
input_ = sys.stdin.read() | |
output_ = run_parser(input_) | |
print output_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment