alexeygrigorev/vimeo-download.py

kbabanov · 2025-01-27T04:55:07Z

Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later.

import importlib.metadata
import subprocess
import sys

required = {'requests', 'tqdm', 'moviepy'}
installed = {pkg.metadata['Name'] for pkg in importlib.metadata.distributions()}
missing = required - installed

if missing:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip'])
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', *missing])


import os
import base64
import requests
from shutil import which
from tqdm import tqdm
from random import choice
from string import ascii_lowercase
from concurrent.futures import ThreadPoolExecutor

has_ffmpeg = False
moviepy_deprecated = False
has_youtube_dl = False
has_yt_dlp = False

if which('ffmpeg') is not None:
    has_ffmpeg = True

if which('youtube-dl') is not None:
    has_youtube_dl = True

if which('yt-dlp') is not None:
    has_yt_dlp = True

if not has_ffmpeg:
    try:
        from moviepy.editor import *  # before 2.0, deprecated
        moviepy_deprecated = True
    except ImportError:
        from moviepy import *  # after 2.0

url = input('enter [master|playlist].json url: ')
name = input('enter output name: ')

if 'master.json' in url:
    url = url[:url.find('?')] + '?query_string_ranges=1'
    url = url.replace('master.json', 'master.mpd')
    print(url)

    if has_youtube_dl:
        subprocess.run(['youtube-dl', url, '-o', name])
        sys.exit(0)

    if has_yt_dlp:
        subprocess.run(['yt-dlp', url, '-o', name])
        sys.exit(0)

    print('you should have youtube-dl or yt-dlp in your PATH to download master.json like links')
    sys.exit(1)


def download_segment(segment_url, segment_path):
    resp = requests.get(segment_url, stream=True)
    if resp.status_code != 200:
        print('not 200!')
        print(segment_url)
        return
    with open(segment_path, 'wb') as segment_file:
        for chunk in resp:
            segment_file.write(chunk)


def download(what, to, base):
    print('saving', what['mime_type'], 'to', to)
    init_segment = base64.b64decode(what['init_segment'])

    # suffix for support multiple downloads in same folder
    segment_suffix = ''.join(choice(ascii_lowercase) for i in range(20)) + '_'

    segment_urls = [base + segment['url'] for segment in what['segments']]
    segment_paths = [f"segment_{i}_" + segment_suffix + ".tmp" for i in range(len(segment_urls))]

    with ThreadPoolExecutor(max_workers=15) as executor:
        list(tqdm(executor.map(download_segment, segment_urls, segment_paths), total=len(segment_urls)))

    with open(to, 'wb') as file:
        file.write(init_segment)
        for segment_path in segment_paths:
            with open(segment_path, 'rb') as segment_file:
                file.write(segment_file.read())
            os.remove(segment_path)

    print('done')


name += '.mp4'
base_url = url[:url.rfind('/', 0, -26) + 1]
response = requests.get(url)
if response.status_code >= 400:
    print('error: cant get url content, test your link in browser, code=', response.status_code, '\ncontent:\n', response.content)
    sys.exit(1)

content = response.json()

vid_heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
vid_idx, _ = max(vid_heights, key=lambda _h: _h[1])

audio_present = True
if not content['audio']:
    audio_present = False

audio_quality = None
audio_idx = None
if audio_present:
    audio_quality = [(i, d['bitrate']) for (i, d) in enumerate(content['audio'])]
    audio_idx, _ = max(audio_quality, key=lambda _h: _h[1])

base_url = base_url + content['base_url']

# prefix for support multiple downloads in same folder
files_prefix = ''.join(choice(ascii_lowercase) for i in range(20)) + '_'

video_tmp_file = files_prefix + 'video.mp4'
video = content['video'][vid_idx]
download(video, video_tmp_file, base_url + video['base_url'])

audio_tmp_file = None
if audio_present:
    audio_tmp_file = files_prefix + 'audio.mp4'
    audio = content['audio'][audio_idx]
    download(audio, audio_tmp_file, base_url + audio['base_url'])

if not audio_present:
    os.rename(video_tmp_file, name)
    sys.exit(0)

if has_ffmpeg:
    subprocess.run(['ffmpeg', '-i', video_tmp_file, '-i', audio_tmp_file, '-c:v', 'copy', '-c:a', 'copy', name])
    os.remove(video_tmp_file)
    os.remove(audio_tmp_file)
    sys.exit(0)

video_clip = VideoFileClip(video_tmp_file)
audio_clip = AudioFileClip(audio_tmp_file)

final_clip = None
if moviepy_deprecated:
    final_clip = video_clip.set_audio(audio_clip)
else:
    final_clip = video_clip.with_audio(audio_clip)

final_clip.write_videofile(name)

os.remove(video_tmp_file)
os.remove(audio_tmp_file)

kbabanov · 2025-01-28T09:23:47Z

just use virtual environment ;) see details here: https://docs.python.org/3/library/venv.html вт, 28 янв. 2025 г. в 03:44, Pablo ***@***.***>:

…

***@***.**** commented on this gist. ------------------------------ Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV <https://github.com/Javi3rV> ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later. Thanks for this! How can I run it in ubuntu 24.04, where we can't use pip without a virtual enviroment? — Reply to this email directly, view it on GitHub <https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd#gistcomment-5411423> or unsubscribe <https://github.com/notifications/unsubscribe-auth/AFZ5IDPGDXYHWUWR5ZJ4PFL2M3HGVBFKMF2HI4TJMJ2XIZLTSKBKK5TBNR2WLJDUOJ2WLJDOMFWWLO3UNBZGKYLEL5YGC4TUNFRWS4DBNZ2F6YLDORUXM2LUPGBKK5TBNR2WLJDHNFZXJJDOMFWWLK3UNBZGKYLEL52HS4DFVRZXKYTKMVRXIX3UPFYGLK2HNFZXIQ3PNVWWK3TUUZ2G64DJMNZZDAVEOR4XAZNEM5UXG5FFOZQWY5LFVA2DAMBQGUZTKNVHORZGSZ3HMVZKMY3SMVQXIZI> . You are receiving this email because you commented on the thread. Triage notifications on the go with GitHub Mobile for iOS <https://apps.apple.com/app/apple-store/id1477376905?ct=notification-email&mt=8&pt=524675> or Android <https://play.google.com/store/apps/details?id=com.github.android&referrer=utm_campaign%3Dnotification-email%26utm_medium%3Demail%26utm_source%3Dgithub> .

ArnyminerZ · 2025-01-29T16:44:36Z

@kbabanov This worked like a charm. Thank you so much

davidecavestro · 2025-01-31T07:17:00Z

Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later.

I just updated the container image at https://github.com/davidecavestro/vimeo-dl
Is there any video URL that does not expire, so that I can add an automated test to the build?
EDIT: I could also use it to trigger deps installation at build time
EDIT2: I leveraged automatic deps installation passing a fake url

Javi3rV · 2025-01-31T09:20:04Z

I didn't try this, but maybe downloading the json file and using it to test?
We know the json url changes but I'm not sure about the json contents

Edit: as an idea, in my personal script I added the possibility to add more than 1 url in a list of dataclasses (url, outputName). Then it just iterates the list and it downloads them one by one.
I didnt share it because it was just a personal preference, but it can be done in @kbabanov 's script easily aswell. I also thought about using multithreading and download all of them like there is no tomorrow but vimeo would take a look at net traffic and would suspect something lol.

kbabanov · 2025-01-31T10:45:43Z

@Javi3rV :

I also thought about using multithreading and download all of them like there is no tomorrow but vimeo would take a look at net traffic and would suspect something

if so, you can tweak number of workers in line

with ThreadPoolExecutor(max_workers=15) as executor:

and, if wanted, set it to 1 for disable multithreading at all
I also thought about ability to download multiple urls and may be i came with solution a bit later

davidecavestro · 2025-01-31T14:06:46Z

I find it useful to have a way to avoid asking for user input, so that the whole thing can be easily scripted.
It's often just a matter of supporting env vars such as

url = url = os.getenv("SRC_URL") or input('enter [master|playlist].json url: ')
name = os.getenv("OUT_FILE") or input('enter output name: ')
max_workers = min(int(os.getenv("MAX_WORKERS", 5)), 15)

or/and if you prefer the launch args could be parsed.

Anyway IMHO multithreading is a different matter: as too many simultaneous requests from the same IP are a PITA, I consider a simple loop safer.

I'll check for using json contents for tests.

	import requests
	import base64
	from tqdm import tqdm


	master_json_url = 'https://178skyfiregce-a.akamaihd.net/exp=1474107106~acl=%2F142089577%2F%2A~hmac=0d9becc441fc5385462d53bf59cf019c0184690862f49b414e9a2f1c5bafbe0d/142089577/video/426274424,426274425,426274423,426274422/master.json?base64_init=1'
	base_url = master_json_url[:master_json_url.rfind('/', 0, -26) + 1]

	resp = requests.get(master_json_url)
	content = resp.json()

	heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
	idx, _ = max(heights, key=lambda (_, h): h)
	video = content['video'][idx]
	video_base_url = base_url + video['base_url']
	print 'base url:', video_base_url

	filename = 'video_%d.mp4' % video['id']
	print 'saving to %s' % filename

	video_file = open(filename, 'wb')

	init_segment = base64.b64decode(video['init_segment'])
	video_file.write(init_segment)

	for segment in tqdm(video['segments']):
	segment_url = video_base_url + segment['url']
	resp = requests.get(segment_url, stream=True)
	if resp.status_code != 200:
	print 'not 200!'
	print resp
	print segment_url
	break
	for chunk in resp:
	video_file.write(chunk)

	video_file.flush()
	video_file.close()

alexeygrigorev/vimeo-download.py

kbabanov commented Jan 27, 2025

Uh oh!

kbabanov commented Jan 28, 2025 via email

Uh oh!

ArnyminerZ commented Jan 29, 2025

Uh oh!

davidecavestro commented Jan 31, 2025 •

edited

Loading

Uh oh!

Javi3rV commented Jan 31, 2025 •

edited

Loading

Uh oh!

kbabanov commented Jan 31, 2025

Uh oh!

davidecavestro commented Jan 31, 2025 •

edited

Loading

Uh oh!

alexeygrigorev/vimeo-download.py

kbabanov commented Jan 27, 2025

Uh oh!

kbabanov commented Jan 28, 2025 via email

Uh oh!

ArnyminerZ commented Jan 29, 2025

Uh oh!

davidecavestro commented Jan 31, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Javi3rV commented Jan 31, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kbabanov commented Jan 31, 2025

Uh oh!

davidecavestro commented Jan 31, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

davidecavestro commented Jan 31, 2025 •

edited

Loading

Javi3rV commented Jan 31, 2025 •

edited

Loading

davidecavestro commented Jan 31, 2025 •

edited

Loading