Leonidas-from-XIV · October 30, 2010 12:56
diff --git a/gistfile1.py b/gistfile1.py
 #!/usr/bin/env python3
 # A downloader for Zero Punctuation episodes. Grabs them from the internet
 # and saves them with the approriate naming into the folder. The name is
 # determined automatically from the web site.
 # Licensed under GPLv3, fwiw.
 import sys, urllib.request, re, json

 # the browser that we are going to pretend we are
 # yay for increasing Firefox and Linux marketshare)
 user_agent = """Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101027 Firefox/3.6.12"""
 # where to find the path to the config file. pretty crude, admittedly
 config_re = re.compile(r'value="config=(http://www.themis-media.com/videos/config/\d*-\w*.js)')
 charset_re = re.compile(r'charset=(.*)')

 def construct_request(url, host, referrer=None):
    """This constructs a request that actually works. Setting User-Agent is not enough,
 we also set some headers (grabbed from what Firefox 3.6.12 actually sends)."""
    req = urllib.request.Request(url, headers={
        'User-Agent' : user_agent,
        'Host' : host,
        'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language' : 'en-us,en;q=0.7,de;q=0.3',
        'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Connection' : 'close'
    })
    # maybe they also want a referrer? In any case, we can send one
    if referrer is not None:
        req.add_header('Referer', referrer)

    return req

 def get_config_path(source):
    """Finds the URL of the config file on the page"""
    return config_re.search(source).group(1)

 def download_hook(blocks_done, blocksize, total):
    """Displays the download progress"""
    print("\r {0}/{1}".format(blocks_done * blocksize, total), end='')

 def main():
    url = sys.argv[1]
    # load the "intro" page and extract the config file
    with urllib.request.urlopen(construct_request(url, 'www.escapistmagazine.com')) as page:
        bytestream = page.read()
        content_type = dict(page.getheaders())['Content-Type']
        charset = charset_re.findall(content_type)[0]
        content = bytestream.decode(charset)
        config_path = get_config_path(content)

    # grab the config file. this is not that easy, because the page blacklists
    # "incorrect" requests, so we get BadStatusLine exception
    # => take care that we pretend to be a browser good enough.
    with urllib.request.urlopen(construct_request(config_path, 'www.themis-media.com', referrer=url)) as page:
        # the JS file can be parsed as JSON when we change the quotes
        content = page.read().decode('utf-8').replace("'", '"')
        config = json.loads(content)

    # URL to the video
    video = config['playlist'][1]['url']
    # the name of the episode
    name = config['plugins']['viral']['share']['description']
    # destination filename
    filename = name + '.mp4'

    # grab it
    print("Downloading to {0}".format(filename))
    urllib.request.urlretrieve(video, filename, download_hook)
    print()

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	# A downloader for Zero Punctuation episodes. Grabs them from the internet
	# and saves them with the approriate naming into the folder. The name is
	# determined automatically from the web site.
	# Licensed under GPLv3, fwiw.
	import sys, urllib.request, re, json

	# the browser that we are going to pretend we are
	# yay for increasing Firefox and Linux marketshare)
	user_agent = """Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101027 Firefox/3.6.12"""
	# where to find the path to the config file. pretty crude, admittedly
	config_re = re.compile(r'value="config=(http://www.themis-media.com/videos/config/\d-\w.js)')
	charset_re = re.compile(r'charset=(.*)')

	def construct_request(url, host, referrer=None):
	"""This constructs a request that actually works. Setting User-Agent is not enough,
	we also set some headers (grabbed from what Firefox 3.6.12 actually sends)."""
	req = urllib.request.Request(url, headers={
	'User-Agent' : user_agent,
	'Host' : host,
	'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	'Accept-Language' : 'en-us,en;q=0.7,de;q=0.3',
	'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	'Connection' : 'close'
	})
	# maybe they also want a referrer? In any case, we can send one
	if referrer is not None:
	req.add_header('Referer', referrer)

	return req

	def get_config_path(source):
	"""Finds the URL of the config file on the page"""
	return config_re.search(source).group(1)

	def download_hook(blocks_done, blocksize, total):
	"""Displays the download progress"""
	print("\r {0}/{1}".format(blocks_done * blocksize, total), end='')

	def main():
	url = sys.argv[1]
	# load the "intro" page and extract the config file
	with urllib.request.urlopen(construct_request(url, 'www.escapistmagazine.com')) as page:
	bytestream = page.read()
	content_type = dict(page.getheaders())['Content-Type']
	charset = charset_re.findall(content_type)[0]
	content = bytestream.decode(charset)
	config_path = get_config_path(content)

	# grab the config file. this is not that easy, because the page blacklists
	# "incorrect" requests, so we get BadStatusLine exception
	# => take care that we pretend to be a browser good enough.
	with urllib.request.urlopen(construct_request(config_path, 'www.themis-media.com', referrer=url)) as page:
	# the JS file can be parsed as JSON when we change the quotes
	content = page.read().decode('utf-8').replace("'", '"')
	config = json.loads(content)

	# URL to the video
	video = config['playlist'][1]['url']
	# the name of the episode
	name = config['plugins']['viral']['share']['description']
	# destination filename
	filename = name + '.mp4'

	# grab it
	print("Downloading to {0}".format(filename))
	urllib.request.urlretrieve(video, filename, download_hook)
	print()

	if __name__ == '__main__':
	main()
No results found