Last active
January 13, 2022 13:26
-
-
Save lschwetlick/c3075ac483f115d851026b3c7de4f07b to your computer and use it in GitHub Desktop.
Download Music from a Telegram Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Download Music from a Telegram Conversation | |
- youtube-dl lives here https://github.com/ytdl-org | |
- telegram data can be downloaded in the desktop app via preferences -> advanced -> export | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#echo "Downloading Video $1" | |
youtube-dl -i -x --audio-format mp3 --audio-quality 0 --no-playlist --ignore-errors --restrict-filenames $1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script to download all the youtube videos in a chat | |
Requires | |
- youtube-dl | |
- json of telegram data | |
""" | |
import os | |
import subprocess | |
import json | |
import numpy as np | |
import pandas as pd | |
import datetime | |
import json | |
from tqdm import tqdm | |
# SETUP | |
DATA = '/Users/lisa/Downloads/Telegram Desktop/DataExport_2022-01-13/result.json' | |
CONV = "Saturday’s songs" | |
# FUNCTIONS | |
def get_chat_names(chats): | |
""" | |
Get the names of all conversations | |
""" | |
contact_names = {} | |
for i, c in enumerate(chats): | |
try: | |
contact_names[c["name"]] = i | |
except: | |
contact_names[c["id"]] = i | |
return contact_names | |
def get_msgs_contact(contact_names, name): | |
""" | |
Get all the messages within one conversation | |
""" | |
ix = contact_names[name] | |
return chats[ix]["messages"] | |
def get_contact_df(chats, contact): | |
""" | |
Get conversation as data frame | |
""" | |
contact_names = get_chat_names(chats) | |
contact_msgs_dict = get_msgs_contact(contact_names, contact) | |
df = pd.DataFrame(contact_msgs_dict) | |
df.columns = df.columns.str.replace('from', 'sender') | |
for c in ["id", "type", "date", "sender", "text", "sticker_emoji", "photo", "edited"]: | |
if not c in df: | |
df[c]=None | |
df = df[["id", "type", "date", "sender", "text", "sticker_emoji", "photo", "edited"]] | |
df = df[df.type == "message"] | |
df = df.reset_index() | |
df["date"] = pd.to_datetime(df["date"]) | |
df["ts"] = df.date.values.astype(np.int64) // 10 ** 9 | |
return(df) | |
def get_links(df): | |
""" | |
Get all links as a list | |
""" | |
links = [] | |
for msg in list(df["text"]): | |
if type(msg)==list: | |
#print(msg) | |
for part in msg: | |
if type(part)==dict: | |
if part["type"]=="link": | |
links.append(part["text"]) | |
return links | |
def download(links): | |
for link in tqdm(links): | |
try: | |
print("Downloading ", link) | |
res = subprocess.run(['sh', './dl_link.sh', link], capture_output=True) | |
except: | |
print("failed for ", link) | |
pass | |
if __name__=="__main__": | |
# load telegram data | |
with open(DATA) as f: | |
data = json.load(f) | |
chats= data["chats"]["list"] | |
# find correct conversation | |
df = get_contact_df(chats, CONV) | |
links = get_links(df) | |
# download all the links | |
download(links) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment