Created
February 17, 2012 01:07
-
-
Save handyman5/1849444 to your computer and use it in GitHub Desktop.
Daily Show downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# | |
# Notes | |
# ----- | |
# This script depends on rtmpdump and mythnettv; specify their paths in the global variables below. | |
# If you don't need mythnettv support, leave it undefined (MYTHNETTV="") and the script will skip that step | |
# svn update -r275 # rtmpdump | |
# | |
import re,threading | |
from os import stat, popen | |
from sys import exit | |
from urllib import urlopen | |
from optparse import OptionParser | |
from lxml import etree | |
from re import escape | |
import logging | |
GEN_URL="http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?uri=mgid:cms:episode:thedailyshow.com:%d" | |
MYTHNETTV="mythnettv" | |
RTMPDUMP="rtmpdump" | |
RTMPDUMP_OPTS="-W http://media.mtvnservices.com/player/release/?v=4.4.6 -q" | |
def get_media_id(page): | |
matches = re.findall('mgid:cms:[^:]+:comedycentral.com:[0-9]+', page) | |
logging.debug('get_media_id:: matches[0] = %s' % matches[0]) | |
media_id = int(matches[0].split(':')[-1]) | |
logging.info('get_media_id:: media_id = %d' % media_id) | |
return media_id | |
def get_metadata(page): | |
html = etree.HTML(page) | |
head = html.find('head') | |
body = html.find('body') | |
title = body.xpath("//div[@class='showName']")[0].text.strip() | |
subtitle = body.xpath("//h1[@class='subTitle']")[0].text.strip() | |
description = head.xpath("//meta[@name='description']")[0].values()[1] | |
logging.debug('get_metadata:: %s: %s: %s' % (title, subtitle, description)) | |
return (title, subtitle, description) | |
def get_episode_list(): | |
urls = { 'The Daily Show': "http://www.thedailyshow.com/full-episodes", 'Colbert Report': "http://www.colbertnation.com/full-episodes/" } | |
episodes = [] | |
for (title,url) in urls.items(): | |
page_data = urlopen(url).read() | |
anchors = etree.HTML(page_data).xpath("//span[@class='date']/a") | |
for item in anchors: | |
logging.debug('get_episode_list:: item.text = %s, item.href = %s' % (item.text, item.attrib['href'])) | |
episodes.append( ( title, item.text, item.attrib['href'] ) ) | |
return episodes | |
def get_media_files(id_num): | |
output = urlopen(GEN_URL % id_num).read() | |
urls = re.findall('<src>(.*\.mp4)</src>', output) | |
logging.debug('get_media_files:: urls = %s' % '\n'.join(urls)) | |
return urls | |
def merge_files(id_num, filenames): | |
cmd = "mencoder -really-quiet -oac mp3lame -ovc lavc -lavcopts vcodec=mpeg4:vbitrate=1800 -o output_%d.avi " % id_num | |
for x in filenames: cmd += " %s" % x | |
try: | |
stat("output_%d.avi" % id_num) | |
except OSError: | |
logging.debug('merge_files:: cmd = %s' % cmd) | |
popen(cmd) | |
return "output_%d.avi" % id_num | |
######################################## | |
class Downloader(threading.Thread): | |
def __init__(self, url): | |
self.rtmp = url | |
self.output = "" | |
threading.Thread.__init__(self) | |
def download_rtmp_url(self): | |
filename = self.rtmp.split("/")[-1].replace("mp4", "flv") | |
cmd = RTMPDUMP + " " + RTMPDUMP_OPTS + " -o " + filename + " -r " + self.rtmp | |
logging.debug('download_rtmp_url:: cmd = %s' % cmd) | |
try: | |
stat(filename) | |
except OSError: | |
popen(cmd) | |
return filename | |
def run (self): | |
if self.rtmp.find('sixty') >= 0: | |
self.output = "" | |
return | |
if self.rtmp.find('sting') >= 0: | |
self.output = "" | |
return | |
logging.debug('download_rtmp_url:: output = %s' % self.rtmp) | |
self.output = self.download_rtmp_url() | |
######################################## | |
if __name__ == '__main__': | |
usage = ''' | |
%prog http://www.thedailyshow.com/full-episodes/date-and-guest | |
%prog http://www.colbertnation.com/full-episodes/date-and-title-of-the-episode | |
%prog http://media.mtvnservices.com/mgid:cms:item:comedycentral.com:{ID} | |
%prog http://media.mtvnservices.com/mgid:cms:video:comedycentral.com:{ID} | |
%prog http://media.mtvnservices.com/mgid:cms:fullepisode:comedycentral.com:{ID} | |
''' | |
parser = OptionParser(usage) | |
parser.add_option("-l", "--list", action="store_true", dest="list", default=False, help="show a list of this week's episodes to select from") | |
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="specify log level (info, warn, debug, etc.)") | |
(options, args) = parser.parse_args() | |
if len(args) == 0 and not options.list: | |
parser.print_help() | |
exit(1) | |
loglevel = getattr(logging, options.debug and "DEBUG" or "INFO".upper(), None) | |
logging.basicConfig(level=loglevel) | |
url = "" | |
if options.list: | |
episodes = get_episode_list() | |
index = 0 | |
for episode in episodes: | |
print "%d: %s - %s" % (index, episode[0], episode[1]) | |
index = index + 1 | |
selected = raw_input('Enter an episode number: ') | |
url = episodes[int(selected)][2] | |
else: | |
url = args[0] | |
page_data = urlopen(url).read() | |
id_num = get_media_id(page_data) | |
(title, subtitle, description) = get_metadata(page_data) | |
threads = [] | |
for x in range(1,5): | |
url = get_media_files(id_num + x)[-1] | |
d = Downloader(url) | |
threads.append(d) | |
d.start() | |
# urls = get_media_files(id_num) | |
# threads = [] | |
# for url in urls: | |
# d = Downloader(url) | |
# threads.append(d) | |
# d.start() | |
results_new = [] | |
for thread in threads: | |
thread.join() | |
output = thread.output | |
logging.debug('downloader output: %s' % thread.output) | |
results_new.append(output) | |
logging.debug('\n'.join(results_new)) | |
logging.debug('Merging files for id %d' % id_num) | |
result = merge_files(id_num, results_new) | |
if MYTHNETTV != "": | |
cmd = MYTHNETTV + ' importlocal %s "%s" "%s" "%s"' % (result, title, subtitle, escape(description)) | |
logging.debug('Importing into MythTV; command = %s' % cmd) | |
popen(cmd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment