Created
October 2, 2019 10:47
-
-
Save eioo/48b22ecdad29ce112c51a80d6c20ce8f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import json | |
chat_name = '' | |
from_name = '' | |
with open('result.json', 'r', encoding='utf8') as f: | |
content = f.read() | |
data = json.loads(content) | |
chats = data['chats']['list'] | |
def get_chat(chat_name): | |
global chats | |
for chat in chats: | |
if chat['name'] == chat_name: | |
return chat | |
def get_user_messages(chat_name, user): | |
filtered = [] | |
chat = get_chat(chat_name) | |
for msg in chat['messages']: | |
if 'from' in msg and msg['from'] == user and msg['text']: | |
filtered.append(msg) | |
return filtered | |
messages = get_user_messages(chat_name, from_name) | |
words = [] | |
for message in messages: | |
text = message['text'] | |
if not text or type(text) is not str or text.startswith('/'): | |
continue | |
splitted = text.lower().split() | |
for piece in splitted: | |
if len(piece) == 1: | |
continue | |
words.append(piece) | |
freq = collections.Counter(words) | |
sorted_freq = sorted(freq.items(), key=lambda kv: kv[1])[::-1] | |
i = 0 | |
for key, value in sorted_freq: | |
index = str(i + 1) + "." | |
print(f'{index.ljust(5)} {key.ljust(20, " ")} {value}') | |
i += 1 | |
if i > 500: | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment