Created
August 29, 2018 18:14
-
-
Save varunvora/39e1dcad2bef5aea466103939816678a to your computer and use it in GitHub Desktop.
WhatsApp Group Chat Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copy the conversation and provide it to stdin. | |
This program will calculate the number of messages and total length of all messages | |
By each person in the conversation | |
""" | |
from sys import stdin | |
import re | |
raw_conversation = stdin.readlines() | |
message_timestamp = re.compile(r'\[.*8\]') # example: [17:06, 8/7/2018] | |
participants = dict() | |
MESSAGES = 0 | |
CHARACTERS = 1 | |
user = None | |
for line in raw_conversation: | |
if line[0] == '[': # start of a new message | |
line = re.sub(message_timestamp, '', line, count=1) | |
line = line.strip() | |
line = line.split(':') | |
user = line[0].strip() | |
line = ":".join(line[1:]) | |
if user not in participants: | |
participants[user] = [0, 0] | |
participants[user][MESSAGES] += 1 | |
participants[user][CHARACTERS] += len(line) | |
print('Contact', 'Messages', 'Characters', sep='\t\t') | |
for user in sorted(participants.keys(), key=lambda x:participants[x][MESSAGES], reverse=True): | |
print(*([user] + participants[user]), sep='\t\t') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment