Last active
May 8, 2017 22:54
-
-
Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.
Export all your tablo vidoes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Requirements: | |
# - Python >= 3.5 | |
# - requests | |
# - aiohttp | |
# | |
import argparse | |
import asyncio | |
import json | |
import os | |
import shutil | |
import tempfile | |
import aiohttp | |
import requests | |
from lxml.html import fromstring | |
parser = argparse.ArgumentParser(description='Download all tablo videos.') | |
parser.add_argument('--output', dest='output', default='./videos') | |
parser.add_argument('--ip', dest='ip', default='192.168.1.43') | |
parser.add_argument('--concurrency', type=int, default=4) | |
args = parser.parse_args() | |
ENDPOINT = 'http://{}:18080'.format(args.ip) | |
OUTPUT = args.output | |
MAX_SIMULTANEOUS_DOWNLOAD = 32 | |
CONCURRENCY = args.concurrency | |
def get_videos(): | |
resp = requests.get(f'{ENDPOINT}/pvr') | |
dom = fromstring(resp.content) | |
videos = [] | |
for anchor in dom.cssselect('tr td.n a'): | |
video = anchor.text_content().strip() | |
if video.isdigit(): | |
videos.append(video) | |
return videos | |
def get_meta(video_id): | |
resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt') | |
return resp.json() | |
async def download_segment(session, output_filename, video_id, seg_name): | |
print(f'download segment {seg_name} for {output_filename}') | |
resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}') | |
return await resp.read() | |
class Show: | |
def __init__(self, meta): | |
self.meta = meta | |
@property | |
def program(self): | |
return self.meta['recEpisode']['jsonFromTribune']['program'] | |
@property | |
def series(self): | |
return self.meta['recSeries']['jsonForClient'] | |
@property | |
def season(self): | |
return self.meta['recSeason']['jsonForClient'] | |
@property | |
def episode(self): | |
return self.meta['recEpisode']['jsonForClient'] | |
@property | |
def episode_title(self): | |
try: | |
return self.program['episodeTitle'] | |
except KeyError: | |
try: | |
return self.season['episodeTitle'] | |
except KeyError: | |
return self.episode['title'] | |
@property | |
def title(self): | |
try: | |
return self.program['title'] | |
except KeyError: | |
return self.series['title'] | |
@property | |
def episode_number(self): | |
try: | |
return str(self.program['episodeNum']).zfill(2) | |
except KeyError: | |
return str(self.episode['episodeNumber']).zfill(2) | |
@property | |
def season_number(self): | |
try: | |
return str(self.program['seasonNum']).zfill(2) | |
except KeyError: | |
return str(self.season['seasonNumber']).zfill(2) | |
@property | |
def identifier(self): | |
if self.valid_season: | |
return f's{self.season_number}e{self.episode_number}' | |
else: | |
return self.program['seriesId'] | |
@property | |
def valid_season(self): | |
return self.season_number != '00' and self.episode_number != '00' | |
@property | |
def is_movie(self): | |
return not self.valid_season and 'jsonFromTribune' not in self.meta['recEpisode'] | |
@property | |
def year(self): | |
return self.series['originalAirDate'].split('-')[0] | |
async def download(video_id): | |
meta = get_meta(video_id) | |
resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs') | |
dom = fromstring(resp.content) | |
tmp_dir = tempfile.mkdtemp() | |
show = Show(meta) | |
if show.is_movie: | |
output_dir = os.path.join(OUTPUT, 'Movies') | |
output_filename = f"{show.title} ({show.year})" | |
else: | |
output_filename = f"{show.title} - {show.identifier} - {show.episode_title}" | |
if show.valid_season: | |
output_dir = os.path.join(OUTPUT, 'TV Shows', show.title, | |
f"Season {show.season_number}") | |
else: | |
output_dir = os.path.join(OUTPUT, 'TV Shows', show.title) | |
ts_filepath = os.path.join(tmp_dir, output_filename + '.ts') | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
output_filepath = f'{output_dir}/{output_filename}.mp4' | |
meta_dir = os.path.join(OUTPUT, 'meta') | |
if not os.path.exists(meta_dir): | |
os.mkdir(meta_dir) | |
meta_filepath = os.path.join(meta_dir, output_filename + '.json') | |
fi = open(meta_filepath, 'w') | |
fi.write(json.dumps(meta)) | |
fi.close() | |
if os.path.exists(output_filepath): | |
print(f'Skipping {output_filepath}, already downloaded') | |
return | |
segments = dom.cssselect('tr td.n') | |
batch = [] | |
count = 0 | |
session = aiohttp.ClientSession() | |
for seg in segments: | |
count += 1 | |
seg_name = seg.text_content() | |
if seg_name in ('Parent Directory/',): | |
continue | |
batch.append(download_segment(session, output_filename, video_id, seg_name)) | |
if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY): | |
fi = open(ts_filepath, 'ab') | |
for file_chunk in await asyncio.gather(*batch): | |
fi.write(file_chunk) | |
fi.close() | |
print(f'Downloaded ({count}/{len(segments)}) of {output_filename}') | |
batch = [] | |
fi = open(ts_filepath, 'ab') | |
for file_chunk in await asyncio.gather(*batch): | |
fi.write(file_chunk) | |
fi.close() | |
cmd = [ | |
'ffmpeg', '-y', '-i', ts_filepath, | |
'-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2', | |
'-c', 'copy', f'{output_filepath}'] | |
print(f'Executing: {" ".join(cmd)}') | |
process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE) | |
await process.communicate() | |
await session.close() | |
shutil.rmtree(tmp_dir) | |
async def download_all(): | |
videos = get_videos() | |
batch = [] | |
for video in videos: | |
batch.append(download(video)) | |
if len(batch) >= CONCURRENCY: | |
await asyncio.gather(*batch) | |
batch = [] | |
await asyncio.gather(*batch) | |
if __name__ == '__main__': | |
event_loop = asyncio.get_event_loop() | |
event_loop.run_until_complete(download_all()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment