Skip to content

Instantly share code, notes, and snippets.

@technillogue
Last active June 25, 2019 13:16
Show Gist options
  • Save technillogue/05a6f2fef06fb53f3e59977ead882314 to your computer and use it in GitHub Desktop.
Save technillogue/05a6f2fef06fb53f3e59977ead882314 to your computer and use it in GitHub Desktop.
shift parsing
"""
This is the version my interviewer seemed to want me to write.
It uses a list of shifts using the 1900-1-1 offsets returned by strptime
"""
import csv
from datetime import datetime as dt
DAY_NAMES = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
def find_shift_agents(csv_filename, search_dt):
shifts = [[] for day in DAY_NAMES]
with open(csv_filename) as f:
for name, shift_ranges in csv.reader(f):
for shift_range in shift_ranges.split(" / "):
# e.g. "Mon-Wed 5 pm - 12 am"
day_range, time_range = shift_range.split(" ", 1)
start_day, stop_day = map(DAY_NAMES.index, day_range.split("-"))
if stop_day < start_day:
days = [*range(stop_day + 1), *range(start_day, 7)]
else:
days = range(start_day, stop_day + 1)
start_time, stop_time = (
dt.strptime(
t,
"%I{} %p".format(":%M" if ":" in t else "")
)
for t in time_range.split(" - ")
)
for day in days:
shifts[day].append((name, start_time, stop_time))
search_time = search_dt.replace(year=1900, month=1, day=1)
agents = [
name
for (name, start, stop) in shifts[search_dt.weekday()]
if (start <= search_time <= stop
or start <= search_time and stop == dt(1900, 1, 1))
]
return agents
"""
This is the version I originally tried to write during the interview.
It is not the most efficient or concise version, but I thought it was
more readible and would be easier for it to handle multiple TZ
(since in the original context, the shift workers IRL are distributed
across call centers around the globe
"""
from typing import List, Tuple, Iterator
from datetime import datetime as dt, timedelta as delta
import csv
import pdb
DAY_NAMES = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] # type: list
TimeRange = Tuple[dt, dt]
def parse_shift_range(shift_range: str, epoch: dt) -> Iterator[TimeRange]:
"""
epoch is the midnight of the monday of the week we're talking about
>>> pdb.set_trace()
>>> tr = parse_shift_range("Mon-Tue 11:30 am - 7 pm", dt(2019, 5, 6))
>>> [(t1.isoformat(), t2.isoformat()) for (t1, t2) in tr]
[('2019-05-06T11:30:00', '2019-05-06T19:00:00'), \
('2019-05-07T11:30:00', '2019-05-07T19:00:00')]
"""
days, times = shift_range.split(" ", 1)
start_day, stop_day = (DAY_NAMES.index(day) for day in days.split("-"))
start, stop = (
dt.strptime(
t,
"%I{} %p".format(":%M" if ":" in t else "")
) - dt(1900, 1, 1) # strptime sets Jan 1 1900 as the default date
for t in times.split(" - ")
)
if start > stop and stop == delta(0):
stop = delta(1)
# C strptime uses the convention that 12 am refers to the midnight of
# the day preceeding day, not the next day
# in the examples provided, "5 pm - 12 am" is used
# it is understood that 12 am means 24:00 PM in this context
if start_day <= stop_day:
offsets = [*range(start_day, stop_day + 1)]
else:
offsets = [*range(0, stop_day + 1), *range(start_day, 7)]
for offset in offsets:
date = epoch + delta(offset)
yield (date + start, date + stop)
def find_shift_agents(csv_filename: str, search_dt: dt) -> List[str]:
# e.g. search_dt is 2019-05-10T11:41:46
# take date (2019-05-10), subtract weekday offset (Mon=0, Sun=6)
# as number of days to go back in time
epoch_date = search_dt.date() - delta(search_dt.weekday())
# then revert to dt for 0:00
epoch = dt(epoch_date.year, epoch_date.month, epoch_date.day)
with open(csv_filename) as f:
reader = csv.reader(f)
shifts = [] # type: List[Tuple[dt, dt, str]]
for (name, shift_ranges) in reader:
for shift_range in shift_ranges.split(" / "):
shifts.extend(
(*times, name)
for times in parse_shift_range(shift_range, epoch)
)
working_agents = [
name
for (start, stop, name) in shifts
if start <= search_dt <= stop
]
return working_agents
"""
This is the most efficient version of this code, also the most concise. It makes exactly one pass.
It uses some more complicated boolean logic for the date ranges, which is somewhat less readible.
Like the version the interviewer seemed to want,
it would be difficult for it to handle shifts in multiple TZ
"""
import csv
from datetime import time, datetime
from typing import Iterator
from dateutil.parser import parse
parse_date = "Mon Tue Wed Thu Fri Sat Sun".split().index
def parse_time(t: str) -> time:
return parse(t).time()
def find_shift_agents(csv_filename: str, search_dt: datetime) -> Iterator[str]:
search_date, search_time = search_dt.weekday(), search_dt.time()
for name, shift_ranges in csv.reader(open(csv_filename)):
for shift_range in shift_ranges.split("/ "):
date_range, time_range = (
r.split("-") for r in shift_range.split(" ", 1)
)
start_date, stop_date = map(parse_date, date_range)
start_time, stop_time = map(parse_time, time_range)
if (
(all if start_date < stop_date else any)(
[start_date <= search_date, search_date <= stop_date]
)
and start_time <= search_time
and (search_time <= stop_time or (stop_time == time(0)))
):
yield name
import os
import csv
import pdb
from importlib import import_module
from datetime import datetime as dt
from typing import List, NamedTuple, Tuple, Callable, Iterator
import pytest
import mypy.api
# first, testing individual components
from parse_shifts_1_clean import parse_shift_range
def test_parse_range():
tr = parse_shift_range("Mon-Tue 11:30 am - 7 pm", dt(2019, 5, 6))
assert [
(t1.isoformat(), t2.isoformat()) for (t1, t2) in tr
] == [
("2019-05-06T11:30:00", "2019-05-06T19:00:00"),
("2019-05-07T11:30:00", "2019-05-07T19:00:00")
]
# second, combinatorial testing
# each implementation should do the same thing
# and be discovered regardless of exact name
IMPL_FNAMES = [
fname
for fname in os.listdir(".")
if fname.startswith("parse_shifts") and fname.endswith(".py")
]
IMPL_MODULES = [
import_module(fname.split(".")[0])
for fname in IMPL_FNAMES
]
def exhaust(iterator: Iterator) -> Callable:
"""
generator functions can't be tested as-is, we have to collect the results
before we'll be able to make assertions about them
"""
def exhausted_iterator(*args, **kwargs):
return list(iterator(*args, **kwargs))
return exhausted_iterator
IMPLS = [
exhaust(module.find_shift_agents)
for module in IMPL_MODULES
if "find_shift_agents" in dir(module)
]
# pytest is slightly counterinuitive
# it will discover functions like def test_whatever(impl): ...
# and run that test with the impl parameter of that test set to whatever this
# decorated impl function returns.
# pytest repeats the test for every item in params (in this case, for every
# IMPL)
# personally, i feel like there's a way to do this that doesn't make pylint
# complain about the test function redefining impl from outer scope
@pytest.fixture(params=IMPLS)
def impl(request) -> Callable:
return request.param
FILES = [
("baseline.csv", [["John", "Mon-Fri 9 am - 5 pm"]]),
("times.csv", [["John", "Mon-Fri 9 am - 4:59 pm"]]),
("days.csv", [["John", "Sun-Fri 9 am - 5 pm"]]),
("multiple.csv", [
["John",
"Mon-Wed 9 am - 4:59 pm / Thu-Fri 9 am - 4 pm"]
])
]
# the neat thing about this is that your setup and teardown can be
# trivially combinatorial
@pytest.fixture(params=FILES)
def fname(request) -> Iterator[str]:
name, rows = request.param
with open(name, "w") as f:
writer = csv.writer(f)
writer.writerows(rows)
f.close()
yield name
os.remove(name)
Case = NamedTuple("Case", [
("name", str),
("search_dt", dt),
("expected_agents", List[str])
])
CASES = [
Case("saturday", dt(2019, 5, 11, 12), []),
Case("before", dt(2019, 5, 14, 8, 59), []), # all the rest are Tuesdays
Case("after", dt(2019, 5, 14, 17, 1), []),
Case("simple", dt(2019, 5, 14, 12), ["John"]),
Case("past", dt(100, 1, 7, 12), ["John"]),
Case("future", dt(3666, 1, 5, 12), ["John"])
]
@pytest.fixture(params=CASES)
def case(request) -> Tuple[str, dt, List[str]]:
return request.param
def test_generated_cases(fname: str, impl: Callable, case: Case):
actual = impl(fname, case.search_dt)
if actual != case.expected_agents:
pdb.set_trace()
actual = impl(fname, case.search_dt)
assert actual == case.expected_agents
# third, test using the provided datasets
EXISTING_CASES = [
Case(
"shift_schedule1.csv",
dt(2019, 5, 12, 11, 39),
[
"Patrice Perkinson", "Keshia Brutus", "Alesha Goggans",
"Christiane Mcferron", "Yasmin Najera", "Glennis Gaunce",
"Nam Gallego", "Cindi Dugas", "Anya Towe", "Mignon Ritzer",
"Angie Mannion", "Lupita Bolden", "Carla Hicks", "Mauro Freeze",
"Broderick Mandel", "Rex Kirsh", "Pierre Pedrosa",
"Carmon Hazen", "Brigida O'Leary", "Sylvia Delozier"
]
),
Case(
"shift_schedule2.csv",
dt(2019, 5, 12, 11, 39),
[
"Patrice Perkinson", "Keshia Brutus", "Denny Twedt",
"Alesha Goggans", "Jarrod Zacarias", "Elba Hawks",
"Christiane Mcferron", "Yasmin Najera", "Glennis Gaunce",
"Penni Pflum", "Nam Gallego", "Cindi Dugas", "Anya Towe",
"Mignon Ritzer", "Angie Mannion", "Lupita Bolden",
"Carla Hicks", "Mauro Freeze", "Broderick Mandel", "Rex Kirsh",
"Pierre Pedrosa", "Carmon Hazen", "Natisha Cuesta",
"Brigida O'Leary", "Sylvia Delozier", "Cynthia Smith"
]
)
]
@pytest.fixture(params=EXISTING_CASES)
def existing_case(request) -> Case:
return request.param
def test_existing_cases(impl, existing_case):
actual = impl(existing_case.name, existing_case.search_dt)
if actual != existing_case.expected_agents:
pdb.set_trace()
actual = impl(existing_case.name, existing_case.search_dt)
assert actual == existing_case.expected_agents
if __name__ == "__main__":
print("Checking types")
result = mypy.api.run([fname for fname in IMPL_FNAMES])
if result[0]:
print("\nType checking report:\n")
print(result[0]) # stdout
if result[1]:
print("\nType error report:\n")
print(result[1]) # stderr
print("Checking unittests")
pytest.main()
Patrice Perkinson Mon-Sun 11 am - 9 pm
Aleshia Ament Fri-Sat 11 am - 9 pm
Adalberto Atchison Mon-Thu 11 am - 10 pm
Mathew Knauf Mon-Thu 11 am - 10 pm
Keshia Brutus Mon-Sun 11 am - 10 pm
Britt Cottman Mon-Thu 11 am - 11 pm
Laurel Boulware Mon-Sat 11 am - 10 pm
Lessie Filip Mon-Thu 11 am - 9 pm
Denny Twedt Mon-Thu 11 am - 10 pm
Alesha Goggans Mon-Sun 11 am - 11 pm
Jarrod Zacarias Mon-Fri 10 am - 9 pm
Elba Hawks Mon-Fri 10 am - 9 pm
Christiane Mcferron Mon-Sun 11 am - 10 pm
Alejandrina Schaper Mon-Fri 11 am - 9 pm
Bert Bodin Mon-Thu 11 am - 10 pm
Allene Raposa Mon-Thu 11 am - 9 pm
Roma Lamson Mon-Wed 5 pm - 12 am
Yasmin Najera Mon-Sun 11 am - 12 am
Sachiko Chard Mon-Thu 9 am - 10 pm
Twanna Wales Mon-Thu 11 am - 10 pm
Robbin Elfrink Mon-Sat 11 am - 10 pm
Glennis Gaunce Mon-Sun 11 am - 10 pm
Edith Dieter Mon-Sat 11 am - 12 am
Gale Pummill Mon-Thu 11 am - 10 pm
Penni Pflum Mon-Fri 11 am - 10 pm
Nam Gallego Mon-Sun 11 am - 4 pm
Cindi Dugas Mon-Sun 11 am - 9 pm
Alexis Flemming Mon-Thu 11 am - 10 pm
Anya Towe Mon-Sun 11 am - 9 pm
Mignon Ritzer Mon-Sun 11 am - 12 am
Angie Mannion Mon-Sun 11 am - 10 pm
Lupita Bolden Mon-Sun 11 am - 10 pm
Theda Jeske Mon-Sat 11 am - 11 pm
Carla Hicks Mon-Sun 11 am - 11 pm
Mauro Freeze Wed-Sun 11 am - 10 pm
Broderick Mandel Mon-Sun 11 am - 10 pm
Tobie Nave Mon-Fri 11 am - 10 pm
Edwina Marek Mon-Fri 11 am - 10 pm
Rex Kirsh Mon-Sun 11 am - 10 pm
Pierre Pedrosa Mon-Sun 11 am - 11 pm
Carmon Hazen Mon-Sun 11 am - 1 pm
Alisha Sthilaire Mon-Thu 11 am - 10 pm
Arletta Berenbaum Mon-Fri 12 pm - 10 pm
Kera Coutts Mon-Thu 10 am - 10 pm
Josefa Saul Mon-Fri 11 am - 10 pm
Natisha Cuesta Mon-Fri 9 am - 9 pm
Brigida O'Leary Mon-Sun 10 am - 11 pm
Jolyn Fullenwider Mon-Sat 11 am - 11 pm
Sylvia Delozier Mon-Sun 8 am - 10 pm
Kurt Kodin Mon-Thu 11 am - 10 pm
Cynthia Smith Mon-Sun 1 pm - 2 pm
Patrice Perkinson Mon-Sun 11:30 am - 9 pm
Aleshia Ament Mon-Thu 11:30 am - 9 pm / Fri-Sat 11:30 am - 9:30 pm
Adalberto Atchison Mon-Thu 11:30 am - 10 pm / Fri-Sat 11:30 am - 11 pm
Mathew Knauf Mon-Thu 11:30 am - 10 pm / Fri-Sat 11:30 am - 11 pm
Keshia Brutus Mon-Sun 11 am - 10 pm
Britt Cottman Mon-Thu 11 am - 11 pm / Fri-Sat 11 am - 12:30 am
Laurel Boulware Mon-Sat 11:30 am - 10 pm
Lessie Filip Mon-Thu 11:30 am - 9:30 pm / Fri-Sat 11:30 am - 10 pm
Denny Twedt Mon-Thu 11:30 am - 10 pm / Fri-Sun 11:30 am - 11 pm
Alesha Goggans Mon-Sun 11 am - 11 pm
Jarrod Zacarias Mon-Fri 10:30 am - 9:30 pm / Sat-Sun 10 am - 9:30 pm
Elba Hawks Mon-Fri 10 am - 9:30 pm / Sat-Sun 9:30 am - 9:30 pm
Christiane Mcferron Mon-Sun 11:30 am - 10:30 pm
Alejandrina Schaper Mon-Fri 11 am - 9 pm
Bert Bodin Mon-Thu 11 am - 10 pm / Fri-Sat 11 am - 11 pm
Allene Raposa Mon-Thu 11:30 am - 9 pm / Fri-Sat 11:30 am - 10 pm
Roma Lamson Mon-Wed 5 pm - 12:30 am / Thu-Fri 5 pm - 1:30 am
Yasmin Najera Mon-Sun 11 am - 12 am
Sachiko Chard Mon-Thu 9 am - 10 pm / Fri-Sat 9 am - 11 pm
Twanna Wales Mon-Thu 11 am - 10 pm / Fri-Sat 10 am - 10:30 pm
Robbin Elfrink Mon-Sat 11 am - 10 pm
Glennis Gaunce Mon-Sun 11 am - 10 pm
Edith Dieter Mon-Sat 11 am - 12 am
Gale Pummill Mon-Thu 11 am - 10 pm / Fri-Sat 11 am - 12 am
Penni Pflum Mon-Fri 11:30 am - 10 pm / Sat-Sun 7 am - 3 pm
Nam Gallego Mon-Sun 11 am - 4 pm
Cindi Dugas Mon-Sun 11 am - 9:30 pm
Alexis Flemming Mon-Thu 11:30 am - 10 pm / Fri-Sat 11:30 am - 11 pm
Anya Towe Mon-Sun 11:30 am - 9:30 pm
Mignon Ritzer Mon-Sun 11 am - 12 am
Angie Mannion Mon-Sun 11:30 am - 10 pm
Lupita Bolden Mon-Sun 11:30 am - 10 pm
Theda Jeske Mon-Sat 11 am - 11 pm
Carla Hicks Mon-Sun 11 am - 11 pm
Mauro Freeze Mon-Sun 11 am - 10 pm
Broderick Mandel Mon-Sun 11 am - 10:30 pm
Tobie Nave Mon-Fri 11:30 am - 10 pm
Edwina Marek Mon-Fri 11 am - 10 pm / Sat-Sun 5 pm - 10 pm
Rex Kirsh Mon-Sun 11 am - 10:30 pm
Pierre Pedrosa Mon-Sun 11 am - 11 pm
Carmon Hazen Mon-Sun 11 am - 1 pm
Alisha Sthilaire Mon-Thu 11:30 am - 10 pm / Fri-Sat 11:30 am - 10:30 pm
Arletta Berenbaum Mon-Fri 12 pm - 10 pm / Sat-Sun 5 pm - 10 pm
Kera Coutts Mon-Thu 10 am - 10:30 pm / Fri-Sat 10 am - 12:30 am
Josefa Saul Mon-Fri 11:30 am - 10 pm
Natisha Cuesta Mon-Fri 9 am - 9:30 pm / Sat-Sun 9 am - 10 pm
Brigida O'Leary Mon-Sun 10 am - 11 pm
Jolyn Fullenwider Mon-Sat 11 am - 11 pm
Sylvia Delozier Mon-Sun 8 am - 10:30 pm
Kurt Kodin Mon-Thu 11 am - 10:30 pm
Cynthia Smith Mon-Sun 5:30 am - 2 pm
import os
import csv
import pdb
from importlib import import_module
from datetime import datetime as dt
from typing import List, NamedTuple, Tuple, Callable, Iterator
import pytest
import mypy.api
# first, testing individual components
from parse_shifts_1_clean import parse_shift_range
def test_parse_range():
tr = parse_shift_range("Mon-Tue 11:30 am - 7 pm", dt(2019, 5, 6))
assert [
(t1.isoformat(), t2.isoformat()) for (t1, t2) in tr
] == [
("2019-05-06T11:30:00", "2019-05-06T19:00:00"),
("2019-05-07T11:30:00", "2019-05-07T19:00:00")
]
# second, combinatorial testing
# each implementation should do the same thing
# and be discovered regardless of exact name
IMPL_FNAMES = [
fname
for fname in os.listdir(".")
if fname.startswith("parse_shifts") and fname.endswith(".py")
]
IMPL_MODULES = [
import_module(fname.split(".")[0])
for fname in IMPL_FNAMES
]
def exhaust(iterator: Iterator) -> Callable:
"""
generator functions can't be tested as-is, we have to collect the results
before we'll be able to make assertions about them
"""
def exhausted_iterator(*args, **kwargs):
return list(iterator(*args, **kwargs))
return exhausted_iterator
IMPLS = [
exhaust(module.find_shift_agents)
for module in IMPL_MODULES
if "find_shift_agents" in dir(module)
]
# pytest is slightly counterinuitive
# it will discover functions like def test_whatever(impl): ...
# and run that test with the impl parameter of that test set to whatever this
# decorated impl function returns.
# pytest repeats the test for every item in params (in this case, for every
# IMPL)
# personally, i feel like there's a way to do this that doesn't make pylint
# complain about the test function redefining impl from outer scope
@pytest.fixture(params=IMPLS)
def impl(request) -> Callable:
return request.param
FILES = [
("baseline.csv", [["John", "Mon-Fri 9 am - 5 pm"]]),
("times.csv", [["John", "Mon-Fri 9 am - 4:59 pm"]]),
("days.csv", [["John", "Sun-Fri 9 am - 5 pm"]]),
("multiple.csv", [
["John",
"Mon-Wed 9 am - 4:59 pm / Thu-Fri 9 am - 4 pm"]
])
]
# the neat thing about this is that your setup and teardown can be
# trivially combinatorial
@pytest.fixture(params=FILES)
def fname(request) -> Iterator[str]:
name, rows = request.param
with open(name, "w") as f:
writer = csv.writer(f)
writer.writerows(rows)
f.close()
yield name
os.remove(name)
Case = NamedTuple("Case", [
("name", str),
("search_dt", dt),
("expected_agents", List[str])
])
CASES = [
Case("saturday", dt(2019, 5, 11, 12), []),
Case("before", dt(2019, 5, 14, 8, 59), []), # all the rest are Tuesdays
Case("after", dt(2019, 5, 14, 17, 1), []),
Case("simple", dt(2019, 5, 14, 12), ["John"]),
Case("past", dt(100, 1, 7, 12), ["John"]),
Case("future", dt(3666, 1, 5, 12), ["John"])
]
@pytest.fixture(params=CASES)
def case(request) -> Tuple[str, dt, List[str]]:
return request.param
def test_generated_cases(fname: str, impl: Callable, case: Case):
actual = impl(fname, case.search_dt)
if actual != case.expected_agents:
pdb.set_trace()
actual = impl(fname, case.search_dt)
assert actual == case.expected_agents
# third, test using the provided datasets
EXISTING_CASES = [
Case(
"shift_schedule1.csv",
dt(2019, 5, 12, 11, 39),
[
"Patrice Perkinson", "Keshia Brutus", "Alesha Goggans",
"Christiane Mcferron", "Yasmin Najera", "Glennis Gaunce",
"Nam Gallego", "Cindi Dugas", "Anya Towe", "Mignon Ritzer",
"Angie Mannion", "Lupita Bolden", "Carla Hicks", "Mauro Freeze",
"Broderick Mandel", "Rex Kirsh", "Pierre Pedrosa",
"Carmon Hazen", "Brigida O'Leary", "Sylvia Delozier"
]
),
Case(
"shift_schedule2.csv",
dt(2019, 5, 12, 11, 39),
[
"Patrice Perkinson", "Keshia Brutus", "Denny Twedt",
"Alesha Goggans", "Jarrod Zacarias", "Elba Hawks",
"Christiane Mcferron", "Yasmin Najera", "Glennis Gaunce",
"Penni Pflum", "Nam Gallego", "Cindi Dugas", "Anya Towe",
"Mignon Ritzer", "Angie Mannion", "Lupita Bolden",
"Carla Hicks", "Mauro Freeze", "Broderick Mandel", "Rex Kirsh",
"Pierre Pedrosa", "Carmon Hazen", "Natisha Cuesta",
"Brigida O'Leary", "Sylvia Delozier", "Cynthia Smith"
]
)
]
@pytest.fixture(params=EXISTING_CASES)
def existing_case(request) -> Case:
return request.param
def test_existing_cases(impl, existing_case):
actual = impl(existing_case.name, existing_case.search_dt)
if actual != existing_case.expected_agents:
pdb.set_trace()
actual = impl(existing_case.name, existing_case.search_dt)
assert actual == existing_case.expected_agents
if __name__ == "__main__":
print("Checking types")
result = mypy.api.run([fname for fname in IMPL_FNAMES])
if result[0]:
print("\nType checking report:\n")
print(result[0]) # stdout
if result[1]:
print("\nType error report:\n")
print(result[1]) # stderr
print("Checking unittests")
pytest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment