Last active
June 17, 2019 23:42
-
-
Save udf/d125357a955b93decc1e92a60ba543d6 to your computer and use it in GitHub Desktop.
Script to find your most used short messages on Telegram (can be used as a base to do more useful analysis)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import asyncio | |
import logging | |
from collections import defaultdict | |
from telethon import TelegramClient, utils | |
logging.basicConfig(level=logging.INFO) | |
async def main(): | |
client = TelegramClient('user', 6, "eb06d4abfb49dc3eeb1aeb98ae0f581e") | |
await client.start() | |
my_id = utils.get_peer_id(await client.get_me()) | |
async with client.takeout( | |
finalize=False, users=True, chats=True, megagroups=True | |
) as takeout: | |
async for d in takeout.iter_dialogs(): | |
print(f'downloading {d.name}...') | |
num_counted = 0 | |
num_processed = 0 | |
async for m in takeout.iter_messages(d, wait_time=0): | |
if num_processed % 10 == 0: | |
print(f'\rprocessed {num_processed} ({num_counted} counted) messsages', end='') | |
num_processed += 1 | |
if m.from_id != my_id: | |
continue | |
if not m.message: | |
continue | |
if len(m.message) > 64: # arbitrary definition of "short messages" | |
continue | |
message_counts[m.message] += 1 | |
num_counted += 1 | |
print() | |
message_counts = defaultdict(int) | |
asyncio.run(main()) | |
with open('data.json', 'w') as f: | |
json.dump(message_counts, f) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
with open('data.json') as f: | |
counts = json.load(f) | |
sorted_msgs = sorted(counts, key=lambda m: counts[m], reverse=True) | |
# print the top 100 messages | |
top_msgs = sorted_msgs[:100] | |
print('\n'.join(f'{counts[m]} {m}' for m in top_msgs if counts[m] != 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment