Last active
May 8, 2020 20:13
-
-
Save pyrofolium/a8dbb1c26bd6dcb451bfd26a2b29e8ed to your computer and use it in GitHub Desktop.
triplets.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Given a log file of user_id, timestamp & page visited find the 10 most common triplets, where a triplet is an occurrence of 3 pages visited sequentially by the same user. | |
| 1, 1, /home | |
| 1, 2, /shipments | |
| 1, 3, /labels | |
| 1, 4 /something | |
| 1, 5 /something | |
| 2, 4, /home | |
| ... | |
| triplet a list of 3 pages visited in order by the same user | |
| """ | |
| log = """1, 1, /home | |
| 1, 2, /shipments | |
| 1, 3, /labels | |
| 1, 4, /something | |
| 1, 5, /something | |
| 2, 4, /home | |
| """ | |
| from typing import Tuple, List | |
| def parse_line(log_line: str) -> Tuple[int, int, str]: | |
| parsed_line = [i.strip() for i in log_line.split(',')] | |
| return (int(parsed_line[0]), int(parsed_line[1]), parsed_line[2]) | |
| def analyze_log(log_string: str) -> List[str]: | |
| lines = log_string.splitlines() | |
| parsed_lines = [parse_line(line) for line in lines] | |
| pages_by_user = {} # {user: [page]} | |
| triplets_count = {} #{(page, page, page): count} | |
| for line in parsed_lines: | |
| user = line[0] | |
| # timestamp = line[1] | |
| page = line[2] | |
| if user in pages_by_user: | |
| pages_by_user[user].append(page) | |
| if len(pages_by_user[user]) >= 3: | |
| key = tuple(pages_by_user[user][-3:]) | |
| triplets_count[key] = 1 if key not in triplets_count else triplets_count[key] + 1 | |
| else: | |
| pages_by_user[user] = [page] | |
| triplets_by_count = {} #{count: List[(page, page, page)]} | |
| for triplet, count in triplets_count.items(): | |
| triplets_by_count[count] = [triplet] if count not in triplets_by_count else triplets_by_count[count] + [triplet] | |
| result = [] | |
| for i in range(max(triplets_by_count),-1,-1): | |
| if i in triplets_by_count: | |
| result = result + triplets_by_count[i] | |
| if len(result) >= 10: | |
| break | |
| return result[:10] | |
| print(analyze_log(log)) | |
| def say_hello(): | |
| print('Hello, World') | |
| for i in range(5): | |
| say_hello() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment