Skip to content

Instantly share code, notes, and snippets.

@kittinan
Last active May 4, 2018 09:50
Show Gist options
  • Save kittinan/cb6657bbe1b379dabf2e6429d99b5a52 to your computer and use it in GitHub Desktop.
Save kittinan/cb6657bbe1b379dabf2e6429d99b5a52 to your computer and use it in GitHub Desktop.
Download Youtube Audio with multi threading
import pandas as pd
import youtube_dl
import os
from youtube_dl.utils import DownloadError
from concurrent.futures import ThreadPoolExecutor, wait, as_completed
def download_youtube(youtube_id, idx):
ydl_opts = {
'format': 'm4a',
'outtmpl': './audio/%(id)s.%(ext)s'
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([youtube_id])
except DownloadError as e:
print("skip")
print(str(e))
print("[{}] Done: {}".format(idx, youtube_id))
return youtube_id
df = pd.read_csv('../notebook/new_lyric.csv')
print(len(df))
df.dropna(subset=['youtube'], inplace=True)
print(len(df))
pool = ThreadPoolExecutor(4)
futures = []
for idx, row in df.iterrows():
youtube_id = row['youtube']
title = row["title"]
print("[{}] {}: {}".format(idx, title, youtube_id))
filename = "./audio/{}.m4a".format(youtube_id)
if os.path.isfile(filename):
print("{}: Skip file exist".format(youtube_id))
continue
futures.append(pool.submit(download_youtube, youtube_id, idx))
for x in as_completed(futures):
print("{} completed".format(x.result()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment