Skip to content

Instantly share code, notes, and snippets.

@alexeygrigorev
Created September 17, 2016 09:09
Show Gist options
  • Save alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd to your computer and use it in GitHub Desktop.
Save alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd to your computer and use it in GitHub Desktop.
Downloading segmented video from vimeo
import requests
import base64
from tqdm import tqdm
master_json_url = 'https://178skyfiregce-a.akamaihd.net/exp=1474107106~acl=%2F142089577%2F%2A~hmac=0d9becc441fc5385462d53bf59cf019c0184690862f49b414e9a2f1c5bafbe0d/142089577/video/426274424,426274425,426274423,426274422/master.json?base64_init=1'
base_url = master_json_url[:master_json_url.rfind('/', 0, -26) + 1]
resp = requests.get(master_json_url)
content = resp.json()
heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
idx, _ = max(heights, key=lambda (_, h): h)
video = content['video'][idx]
video_base_url = base_url + video['base_url']
print 'base url:', video_base_url
filename = 'video_%d.mp4' % video['id']
print 'saving to %s' % filename
video_file = open(filename, 'wb')
init_segment = base64.b64decode(video['init_segment'])
video_file.write(init_segment)
for segment in tqdm(video['segments']):
segment_url = video_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print 'not 200!'
print resp
print segment_url
break
for chunk in resp:
video_file.write(chunk)
video_file.flush()
video_file.close()
@kbabanov
Copy link

Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later.

import importlib.metadata
import subprocess
import sys

required = {'requests', 'tqdm', 'moviepy'}
installed = {pkg.metadata['Name'] for pkg in importlib.metadata.distributions()}
missing = required - installed

if missing:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip'])
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', *missing])


import os
import base64
import requests
from shutil import which
from tqdm import tqdm
from random import choice
from string import ascii_lowercase
from concurrent.futures import ThreadPoolExecutor

has_ffmpeg = False
moviepy_deprecated = False
has_youtube_dl = False
has_yt_dlp = False

if which('ffmpeg') is not None:
    has_ffmpeg = True

if which('youtube-dl') is not None:
    has_youtube_dl = True

if which('yt-dlp') is not None:
    has_yt_dlp = True

if not has_ffmpeg:
    try:
        from moviepy.editor import *  # before 2.0, deprecated
        moviepy_deprecated = True
    except ImportError:
        from moviepy import *  # after 2.0

url = input('enter [master|playlist].json url: ')
name = input('enter output name: ')

if 'master.json' in url:
    url = url[:url.find('?')] + '?query_string_ranges=1'
    url = url.replace('master.json', 'master.mpd')
    print(url)

    if has_youtube_dl:
        subprocess.run(['youtube-dl', url, '-o', name])
        sys.exit(0)

    if has_yt_dlp:
        subprocess.run(['yt-dlp', url, '-o', name])
        sys.exit(0)

    print('you should have youtube-dl or yt-dlp in your PATH to download master.json like links')
    sys.exit(1)


def download_segment(segment_url, segment_path):
    resp = requests.get(segment_url, stream=True)
    if resp.status_code != 200:
        print('not 200!')
        print(segment_url)
        return
    with open(segment_path, 'wb') as segment_file:
        for chunk in resp:
            segment_file.write(chunk)


def download(what, to, base):
    print('saving', what['mime_type'], 'to', to)
    init_segment = base64.b64decode(what['init_segment'])

    # suffix for support multiple downloads in same folder
    segment_suffix = ''.join(choice(ascii_lowercase) for i in range(20)) + '_'

    segment_urls = [base + segment['url'] for segment in what['segments']]
    segment_paths = [f"segment_{i}_" + segment_suffix + ".tmp" for i in range(len(segment_urls))]

    with ThreadPoolExecutor(max_workers=15) as executor:
        list(tqdm(executor.map(download_segment, segment_urls, segment_paths), total=len(segment_urls)))

    with open(to, 'wb') as file:
        file.write(init_segment)
        for segment_path in segment_paths:
            with open(segment_path, 'rb') as segment_file:
                file.write(segment_file.read())
            os.remove(segment_path)

    print('done')


name += '.mp4'
base_url = url[:url.rfind('/', 0, -26) + 1]
response = requests.get(url)
if response.status_code >= 400:
    print('error: cant get url content, test your link in browser, code=', response.status_code, '\ncontent:\n', response.content)
    sys.exit(1)

content = response.json()

vid_heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
vid_idx, _ = max(vid_heights, key=lambda _h: _h[1])

audio_present = True
if not content['audio']:
    audio_present = False

audio_quality = None
audio_idx = None
if audio_present:
    audio_quality = [(i, d['bitrate']) for (i, d) in enumerate(content['audio'])]
    audio_idx, _ = max(audio_quality, key=lambda _h: _h[1])

base_url = base_url + content['base_url']

# prefix for support multiple downloads in same folder
files_prefix = ''.join(choice(ascii_lowercase) for i in range(20)) + '_'

video_tmp_file = files_prefix + 'video.mp4'
video = content['video'][vid_idx]
download(video, video_tmp_file, base_url + video['base_url'])

audio_tmp_file = None
if audio_present:
    audio_tmp_file = files_prefix + 'audio.mp4'
    audio = content['audio'][audio_idx]
    download(audio, audio_tmp_file, base_url + audio['base_url'])

if not audio_present:
    os.rename(video_tmp_file, name)
    sys.exit(0)

if has_ffmpeg:
    subprocess.run(['ffmpeg', '-i', video_tmp_file, '-i', audio_tmp_file, '-c:v', 'copy', '-c:a', 'copy', name])
    os.remove(video_tmp_file)
    os.remove(audio_tmp_file)
    sys.exit(0)

video_clip = VideoFileClip(video_tmp_file)
audio_clip = AudioFileClip(audio_tmp_file)

final_clip = None
if moviepy_deprecated:
    final_clip = video_clip.set_audio(audio_clip)
else:
    final_clip = video_clip.with_audio(audio_clip)

final_clip.write_videofile(name)

os.remove(video_tmp_file)
os.remove(audio_tmp_file)

@kbabanov
Copy link

kbabanov commented Jan 28, 2025 via email

@ArnyminerZ
Copy link

@kbabanov This worked like a charm. Thank you so much

@davidecavestro
Copy link

davidecavestro commented Jan 31, 2025

Here is new version, which support ffpmeg, youtube-dl, yt-dlp, moviepy 2.0, videos without audio, automatic dependencies installation and multithreading (thx to @Javi3rV ). I have not tested all conditional flows, but it should work. If you find bug or something, i can fix it later.

I just updated the container image at https://github.com/davidecavestro/vimeo-dl
Is there any video URL that does not expire, so that I can add an automated test to the build?
EDIT: I could also use it to trigger deps installation at build time
EDIT2: I leveraged automatic deps installation passing a fake url

@Javi3rV
Copy link

Javi3rV commented Jan 31, 2025

I didn't try this, but maybe downloading the json file and using it to test?
We know the json url changes but I'm not sure about the json contents

Edit: as an idea, in my personal script I added the possibility to add more than 1 url in a list of dataclasses (url, outputName). Then it just iterates the list and it downloads them one by one.
I didnt share it because it was just a personal preference, but it can be done in @kbabanov 's script easily aswell. I also thought about using multithreading and download all of them like there is no tomorrow but vimeo would take a look at net traffic and would suspect something lol.

@kbabanov
Copy link

@Javi3rV :

I also thought about using multithreading and download all of them like there is no tomorrow but vimeo would take a look at net traffic and would suspect something

if so, you can tweak number of workers in line

with ThreadPoolExecutor(max_workers=15) as executor:

and, if wanted, set it to 1 for disable multithreading at all
I also thought about ability to download multiple urls and may be i came with solution a bit later

@davidecavestro
Copy link

davidecavestro commented Jan 31, 2025

I find it useful to have a way to avoid asking for user input, so that the whole thing can be easily scripted.
It's often just a matter of supporting env vars such as

url = url = os.getenv("SRC_URL") or input('enter [master|playlist].json url: ')
name = os.getenv("OUT_FILE") or input('enter output name: ')
max_workers = min(int(os.getenv("MAX_WORKERS", 5)), 15)

or/and if you prefer the launch args could be parsed.

Anyway IMHO multithreading is a different matter: as too many simultaneous requests from the same IP are a PITA, I consider a simple loop safer.

I'll check for using json contents for tests.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment