Created
July 7, 2019 20:32
-
-
Save VieVie31/22be1ba7629c5d9f5d5e2f0b7d87188f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script to dowload video from the ARTE recording of the Paris's Philarmonie. | |
Example: | |
python3 download.py "https://www.arte.tv/fr/videos/087078-007-A/kazuki-yamada-dirige-berlioz-et-jarrell/" | |
python3 download.py "https://www.arte.tv/fr/videos/087078-007-A/kazuki-yamada-dirige-berlioz-et-jarrell/" my_output_name.mp4 | |
""" | |
import os | |
import sys | |
import json | |
import argparse | |
import requests | |
from time import time | |
HEADERS = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"} | |
def download(url, filename): | |
#source : https://sumit-ghosh.com/articles/python-download-progress-bar/ | |
with open(filename, 'wb') as f: | |
response = requests.get(url, stream=True) | |
total = response.headers.get('content-length') | |
if total is None: | |
f.write(response.content) | |
else: | |
downloaded = 0 | |
total = int(total) | |
for data in response.iter_content(chunk_size=max(int(total / 1000), 1024 * 1024)): | |
downloaded += len(data) | |
f.write(data) | |
done = int(50 * downloaded/total) | |
sys.stdout.write('\r[{}{}]'.format('█' * done, '.' * (50-done))) | |
sys.stdout.flush() | |
sys.stdout.write('\n') | |
if __name__ == "__main__": | |
if sys.version_info.major < 3 and sys.version_info.minor < 7: | |
print("This script requires python 3.7 at least !") | |
if not len(sys.argv[1:]): | |
print("Usage : python3 video_page_url [out_path].") | |
url = sys.argv[1] | |
out_name = "" if not len(sys.argv) > 2 else sys.argv[2] | |
# Get the API info url code from the video page url | |
video_id = url.split("/videos/")[1].split('/')[0] | |
video_infos_url = f"https://api.arte.tv/api/player/v1/config/fr/{video_id}?autostart=1&lifeCycle=1&lang=fr_FR&autostart=1&mute=0" | |
# Start a session | |
session = requests.Session() | |
# Download videos info via the API | |
r = session.get(video_infos_url, headers=HEADERS) | |
if not r.ok: | |
raise Exception(f"Failed to find infos... :'( error code : {r.status_code}") | |
# Parse API result | |
api_infos = json.loads(r.content.decode()) | |
# Find the highest bitrate mp4 video url | |
video_candidates = api_infos['videoJsonPlayer']['VSR'] | |
video_candidates_keys = sorted( | |
filter( | |
lambda k: video_candidates[k]['mediaType'] == 'mp4', | |
video_candidates | |
), | |
key=lambda k: -video_candidates[k]['bitrate'] | |
) | |
if not len(video_candidates_keys): | |
raise Exception("No mp4 url found... :'(") | |
video_candidate_key = video_candidates_keys[0] | |
video_url = video_candidates[video_candidate_key]['url'] | |
# Attribute an output file name | |
if not out_name: | |
valid_chars = '-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' | |
if 'VTI' in api_infos['videoJsonPlayer']: | |
out_name = api_infos['videoJsonPlayer']['VTI'] | |
out_name = ''.join(c for c in out_name if c in valid_chars) | |
elif 'VID' in api_infos['videoJsonPlayer']: | |
out_name = api_infos['videoJsonPlayer']['VID'] | |
out_name = ''.join(c for c in out_name if c in valid_chars) | |
else: | |
out_name = f"{time()}" | |
out_name = out_name + '.mp4' | |
# Download and save content | |
download(video_url, out_name) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment