Skip to content

Instantly share code, notes, and snippets.

@vangheem
Last active May 8, 2017 22:54
Show Gist options
  • Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.
Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.
Export all your tablo vidoes
#
# Requirements:
# - Python >= 3.5
# - requests
# - aiohttp
#
import argparse
import asyncio
import json
import os
import shutil
import tempfile
import aiohttp
import requests
from lxml.html import fromstring
parser = argparse.ArgumentParser(description='Download all tablo videos.')
parser.add_argument('--output', dest='output', default='./videos')
parser.add_argument('--ip', dest='ip', default='192.168.1.43')
parser.add_argument('--concurrency', type=int, default=4)
args = parser.parse_args()
ENDPOINT = 'http://{}:18080'.format(args.ip)
OUTPUT = args.output
MAX_SIMULTANEOUS_DOWNLOAD = 32
CONCURRENCY = args.concurrency
def get_videos():
resp = requests.get(f'{ENDPOINT}/pvr')
dom = fromstring(resp.content)
videos = []
for anchor in dom.cssselect('tr td.n a'):
video = anchor.text_content().strip()
if video.isdigit():
videos.append(video)
return videos
def get_meta(video_id):
resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
return resp.json()
async def download_segment(session, output_filename, video_id, seg_name):
print(f'download segment {seg_name} for {output_filename}')
resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
return await resp.read()
class Show:
def __init__(self, meta):
self.meta = meta
@property
def program(self):
return self.meta['recEpisode']['jsonFromTribune']['program']
@property
def series(self):
return self.meta['recSeries']['jsonForClient']
@property
def season(self):
return self.meta['recSeason']['jsonForClient']
@property
def episode(self):
return self.meta['recEpisode']['jsonForClient']
@property
def episode_title(self):
try:
return self.program['episodeTitle']
except KeyError:
try:
return self.season['episodeTitle']
except KeyError:
return self.episode['title']
@property
def title(self):
try:
return self.program['title']
except KeyError:
return self.series['title']
@property
def episode_number(self):
try:
return str(self.program['episodeNum']).zfill(2)
except KeyError:
return str(self.episode['episodeNumber']).zfill(2)
@property
def season_number(self):
try:
return str(self.program['seasonNum']).zfill(2)
except KeyError:
return str(self.season['seasonNumber']).zfill(2)
@property
def identifier(self):
if self.valid_season:
return f's{self.season_number}e{self.episode_number}'
else:
return self.program['seriesId']
@property
def valid_season(self):
return self.season_number != '00' and self.episode_number != '00'
@property
def is_movie(self):
return not self.valid_season and 'jsonFromTribune' not in self.meta['recEpisode']
@property
def year(self):
return self.series['originalAirDate'].split('-')[0]
async def download(video_id):
meta = get_meta(video_id)
resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
dom = fromstring(resp.content)
tmp_dir = tempfile.mkdtemp()
show = Show(meta)
if show.is_movie:
output_dir = os.path.join(OUTPUT, 'Movies')
output_filename = f"{show.title} ({show.year})"
else:
output_filename = f"{show.title} - {show.identifier} - {show.episode_title}"
if show.valid_season:
output_dir = os.path.join(OUTPUT, 'TV Shows', show.title,
f"Season {show.season_number}")
else:
output_dir = os.path.join(OUTPUT, 'TV Shows', show.title)
ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_filepath = f'{output_dir}/{output_filename}.mp4'
meta_dir = os.path.join(OUTPUT, 'meta')
if not os.path.exists(meta_dir):
os.mkdir(meta_dir)
meta_filepath = os.path.join(meta_dir, output_filename + '.json')
fi = open(meta_filepath, 'w')
fi.write(json.dumps(meta))
fi.close()
if os.path.exists(output_filepath):
print(f'Skipping {output_filepath}, already downloaded')
return
segments = dom.cssselect('tr td.n')
batch = []
count = 0
session = aiohttp.ClientSession()
for seg in segments:
count += 1
seg_name = seg.text_content()
if seg_name in ('Parent Directory/',):
continue
batch.append(download_segment(session, output_filename, video_id, seg_name))
if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
fi = open(ts_filepath, 'ab')
for file_chunk in await asyncio.gather(*batch):
fi.write(file_chunk)
fi.close()
print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
batch = []
fi = open(ts_filepath, 'ab')
for file_chunk in await asyncio.gather(*batch):
fi.write(file_chunk)
fi.close()
cmd = [
'ffmpeg', '-y', '-i', ts_filepath,
'-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
'-c', 'copy', f'{output_filepath}']
print(f'Executing: {" ".join(cmd)}')
process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
await process.communicate()
await session.close()
shutil.rmtree(tmp_dir)
async def download_all():
videos = get_videos()
batch = []
for video in videos:
batch.append(download(video))
if len(batch) >= CONCURRENCY:
await asyncio.gather(*batch)
batch = []
await asyncio.gather(*batch)
if __name__ == '__main__':
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(download_all())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment