Skip to content

Instantly share code, notes, and snippets.

@udf
Last active June 17, 2019 23:42
Show Gist options
  • Save udf/d125357a955b93decc1e92a60ba543d6 to your computer and use it in GitHub Desktop.
Save udf/d125357a955b93decc1e92a60ba543d6 to your computer and use it in GitHub Desktop.
Script to find your most used short messages on Telegram (can be used as a base to do more useful analysis)
import json
import asyncio
import logging
from collections import defaultdict
from telethon import TelegramClient, utils
logging.basicConfig(level=logging.INFO)
async def main():
client = TelegramClient('user', 6, "eb06d4abfb49dc3eeb1aeb98ae0f581e")
await client.start()
my_id = utils.get_peer_id(await client.get_me())
async with client.takeout(
finalize=False, users=True, chats=True, megagroups=True
) as takeout:
async for d in takeout.iter_dialogs():
print(f'downloading {d.name}...')
num_counted = 0
num_processed = 0
async for m in takeout.iter_messages(d, wait_time=0):
if num_processed % 10 == 0:
print(f'\rprocessed {num_processed} ({num_counted} counted) messsages', end='')
num_processed += 1
if m.from_id != my_id:
continue
if not m.message:
continue
if len(m.message) > 64: # arbitrary definition of "short messages"
continue
message_counts[m.message] += 1
num_counted += 1
print()
message_counts = defaultdict(int)
asyncio.run(main())
with open('data.json', 'w') as f:
json.dump(message_counts, f)
import json
with open('data.json') as f:
counts = json.load(f)
sorted_msgs = sorted(counts, key=lambda m: counts[m], reverse=True)
# print the top 100 messages
top_msgs = sorted_msgs[:100]
print('\n'.join(f'{counts[m]} {m}' for m in top_msgs if counts[m] != 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment