Skip to content

Instantly share code, notes, and snippets.

@pyrofolium
Last active May 8, 2020 20:13
Show Gist options
  • Select an option

  • Save pyrofolium/a8dbb1c26bd6dcb451bfd26a2b29e8ed to your computer and use it in GitHub Desktop.

Select an option

Save pyrofolium/a8dbb1c26bd6dcb451bfd26a2b29e8ed to your computer and use it in GitHub Desktop.
triplets.py
"""
Given a log file of user_id, timestamp & page visited find the 10 most common triplets, where a triplet is an occurrence of 3 pages visited sequentially by the same user.
1, 1, /home
1, 2, /shipments
1, 3, /labels
1, 4 /something
1, 5 /something
2, 4, /home
...
triplet a list of 3 pages visited in order by the same user
"""
log = """1, 1, /home
1, 2, /shipments
1, 3, /labels
1, 4, /something
1, 5, /something
2, 4, /home
"""
from typing import Tuple, List
def parse_line(log_line: str) -> Tuple[int, int, str]:
parsed_line = [i.strip() for i in log_line.split(',')]
return (int(parsed_line[0]), int(parsed_line[1]), parsed_line[2])
def analyze_log(log_string: str) -> List[str]:
lines = log_string.splitlines()
parsed_lines = [parse_line(line) for line in lines]
pages_by_user = {} # {user: [page]}
triplets_count = {} #{(page, page, page): count}
for line in parsed_lines:
user = line[0]
# timestamp = line[1]
page = line[2]
if user in pages_by_user:
pages_by_user[user].append(page)
if len(pages_by_user[user]) >= 3:
key = tuple(pages_by_user[user][-3:])
triplets_count[key] = 1 if key not in triplets_count else triplets_count[key] + 1
else:
pages_by_user[user] = [page]
triplets_by_count = {} #{count: List[(page, page, page)]}
for triplet, count in triplets_count.items():
triplets_by_count[count] = [triplet] if count not in triplets_by_count else triplets_by_count[count] + [triplet]
result = []
for i in range(max(triplets_by_count),-1,-1):
if i in triplets_by_count:
result = result + triplets_by_count[i]
if len(result) >= 10:
break
return result[:10]
print(analyze_log(log))
def say_hello():
print('Hello, World')
for i in range(5):
say_hello()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment