Skip to content

Instantly share code, notes, and snippets.

@s-espriz
Created November 5, 2024 13:57
Show Gist options
  • Save s-espriz/c4d5a72ddcaee82ef484ddc1442e1bd6 to your computer and use it in GitHub Desktop.
Save s-espriz/c4d5a72ddcaee82ef484ddc1442e1bd6 to your computer and use it in GitHub Desktop.
This script fetch the News and corresponding reactions from a given telegram channel.
import pandas as pd
from telethon import TelegramClient
import datetime
import re
# Replace these with your own values
api_id = 1111 # Your API ID
api_hash = 'Your API hash'
channel_username = 'Akharinkhabar'
# setting initial date to one day before 7 october
initial_date = datetime.datetime(2023 , 10 , 6)
def extract_title_remove_water_mark(text):
text = re.sub(r"@Akharinkhabar.+$", "", text)
title_text = ""
# Step 2: Extract the title (first line that starts with emoji and bold text)
title = re.search(r"^.+$", text ,re.MULTILINE)
if title :
title_text = title.group()
text = text.strip()
return title_text ,text
def get_reaction(message ) :
message_reaction = {}
if message.reactions:
for reaction in message.reactions.results:
count = reaction.count # Get the count of that reaction
emoji_code = reaction.reaction.emoticon # Final
if emoji_code in message_reaction :
message_reaction[emoji_code] += count
else :
message_reaction[emoji_code]= count
return message_reaction
# Create the client and connect
client = TelegramClient('anon', api_id, api_hash)
async def fetch_messages_to_csv(channel_username, batch_size=100):
await client.start()
channel = await client.get_entity(channel_username)
messages_data = []
async for message in client.iter_messages(channel , offset_date=initial_date , reverse= True) :
try :
message_reaction = get_reaction(message= message)
except :
message_reaction = {}
try:
title , text = extract_title_remove_water_mark(message.raw_text)
except :
title = ""
text = ""
message_info = {
'id': f"{channel_username}-{message.id}",
'channel': channel_username,
'date': message.date,
'views': message.views if message.views is not None else 0,
'reply_to': f"{channel_username}-{message.id}" if message.is_reply else None ,
'title' : title,
'text': text,
'reactions' : message_reaction
}
messages_data.append(message_info)
if len(messages_data) >= batch_size:
# Convert to DataFrame and save to CSV
df = pd.DataFrame(messages_data)
df.to_csv(f'{channel_username}_messages.csv', mode='a', header=not pd.io.common.file_exists(f'{channel_username}_messages.csv'), index=False)
messages_data = [] # Reset the list for the next batch
exit(1)
# Save any remaining messages that didn't fill a full batch
if messages_data:
df = pd.DataFrame(messages_data)
df.to_csv(f'{channel_username}_messages.csv', mode='a', header=not pd.io.common.file_exists(f'{channel_username}_messages.csv'), index=False)
# Run the client
with client:
client.loop.run_until_complete(fetch_messages_to_csv(channel_username, batch_size=100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment