Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save lschwetlick/c3075ac483f115d851026b3c7de4f07b to your computer and use it in GitHub Desktop.
Save lschwetlick/c3075ac483f115d851026b3c7de4f07b to your computer and use it in GitHub Desktop.
Download Music from a Telegram Conversation
Download Music from a Telegram Conversation
- youtube-dl lives here https://github.com/ytdl-org
- telegram data can be downloaded in the desktop app via preferences -> advanced -> export
"""
Script to download all the youtube videos in a chat
Requires
- youtube-dl
- json of telegram data
"""
import os
import subprocess
import json
import numpy as np
import pandas as pd
import datetime
import json
from tqdm import tqdm
# SETUP
DATA = '/Users/lisa/Downloads/Telegram Desktop/DataExport_2022-01-13/result.json'
CONV = "Saturday’s songs"
# FUNCTIONS
def get_chat_names(chats):
"""
Get the names of all conversations
"""
contact_names = {}
for i, c in enumerate(chats):
try:
contact_names[c["name"]] = i
except:
contact_names[c["id"]] = i
return contact_names
def get_msgs_contact(contact_names, name):
"""
Get all the messages within one conversation
"""
ix = contact_names[name]
return chats[ix]["messages"]
def get_contact_df(chats, contact):
"""
Get conversation as data frame
"""
contact_names = get_chat_names(chats)
contact_msgs_dict = get_msgs_contact(contact_names, contact)
df = pd.DataFrame(contact_msgs_dict)
df.columns = df.columns.str.replace('from', 'sender')
for c in ["id", "type", "date", "sender", "text", "sticker_emoji", "photo", "edited"]:
if not c in df:
df[c]=None
df = df[["id", "type", "date", "sender", "text", "sticker_emoji", "photo", "edited"]]
df = df[df.type == "message"]
df = df.reset_index()
df["date"] = pd.to_datetime(df["date"])
df["ts"] = df.date.values.astype(np.int64) // 10 ** 9
return(df)
def get_links(df):
"""
Get all links as a list
"""
links = []
for msg in list(df["text"]):
if type(msg)==list:
#print(msg)
for part in msg:
if type(part)==dict:
if part["type"]=="link":
links.append(part["text"])
return links
def download(links):
for link in tqdm(links):
try:
print("Downloading ", link)
res = subprocess.run(['sh', './dl_link.sh', link], capture_output=True)
except:
print("failed for ", link)
pass
if __name__=="__main__":
# load telegram data
with open(DATA) as f:
data = json.load(f)
chats= data["chats"]["list"]
# find correct conversation
df = get_contact_df(chats, CONV)
links = get_links(df)
# download all the links
download(links)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment