Created
November 30, 2021 00:11
-
-
Save jfreels/d53ec92784193ee757c86117a4ef3ae6 to your computer and use it in GitHub Desktop.
Determine average duration for each step across all sessions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Determine average duration for each step across all sessions """ | |
import sys | |
STREAM = [ | |
# session, step, timestamp | |
[1001, 1, 100000010], # duration = 11 | |
[1001, 2, 100000021], # duration = 12 | |
[1001, 3, 100000033], # duration = 13 | |
[1001, 4, 100000046], # duration = None (there's no next step) | |
[1002, 1, 100000010], # duration = 10 | |
[1002, 2, 100000020], # duration = None (there's no next session+step) | |
[1002, 2, 100000030], # duration = None (this is not the first occurance of this session+step) | |
] | |
# for any given session+step combo, keep only the first occurance | |
EXPECTED_RESULTS = { | |
1: 10.5, | |
2: 12.0, | |
3: 13.0 | |
} | |
def main(): | |
step_metrics = { | |
1: { | |
"total_converted_sessions": 0, | |
"total_converted_duration": 0 | |
}, | |
2: { | |
"total_converted_sessions": 0, | |
"total_converted_duration": 0 | |
}, | |
3: { | |
"total_converted_sessions": 0, | |
"total_converted_duration": 0 | |
} | |
} | |
# keep track of which stream records have been seen, keeping only the latest one that matters | |
seen_stream_records = {} | |
for row in STREAM: | |
session_id = row[0] | |
step = row[1] | |
timestamp = row[2] | |
step_duration = None | |
seen_stream_record = seen_stream_records.get(session_id) | |
# has the session been seen? if not add it to seen_stream_records | |
if not seen_stream_record: | |
# has the previous row ever occured? | |
seen_stream_records[session_id] = { | |
"step": step, | |
"timestamp": timestamp | |
} | |
# if the session has been seen and current step > previous step | |
elif seen_stream_record and step > seen_stream_record["step"]: | |
step_duration = timestamp - seen_stream_record["timestamp"] | |
step_metrics[step-1]["total_converted_sessions"] += 1 | |
step_metrics[step-1]["total_converted_duration"] += step_duration | |
if step < 4: | |
seen_stream_records[session_id] = { | |
"step": step, | |
"timestamp": timestamp | |
} | |
else: | |
# remove the seen_stream_record if the current row step is 4 (last step) | |
del seen_stream_records[session_id] | |
print(step_metrics) | |
avg_step_durations = { | |
1: step_metrics[1]["total_converted_duration"] / step_metrics[1]["total_converted_sessions"], | |
2: step_metrics[2]["total_converted_duration"] / step_metrics[2]["total_converted_sessions"], | |
3: step_metrics[3]["total_converted_duration"] / step_metrics[3]["total_converted_sessions"], | |
} | |
print(avg_step_durations) | |
assert avg_step_durations == EXPECTED_RESULTS | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment