Skip to content

Instantly share code, notes, and snippets.

@p3t3r67x0
Last active March 26, 2018 07:26
Show Gist options
  • Save p3t3r67x0/33586d8095a6ad62e3461e985d8071da to your computer and use it in GitHub Desktop.
Save p3t3r67x0/33586d8095a6ad62e3461e985d8071da to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import requests
from lxml import etree
from fake_useragent import UserAgent
ua = UserAgent()
def request_content(url, ua_string):
headers = {'User-Agent': ua_string, 'Origin': 'http://www.3sat.de'}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
def load_document(filename):
with open(filename, 'r') as f:
return f.read()
def extract_video(node):
try:
return node.xpath('//formitaet/quality[text()="veryhigh"]/following-sibling::url[1]/text()')[3]
except IndexError as e:
return
def extract_title(node):
try:
return node.xpath('//information/title/text()')[0]
except IndexError as e:
return
def extract_channel(node):
try:
return node.xpath('//originChannelTitle/text()')[0]
except IndexError as e:
return
def main():
if len(sys.argv) > 1:
drei_sat_url = sys.argv[1]
else:
print 'Enter the 3SAT url you want to download the mp4!'
sys.exit(1)
content = request_content(drei_sat_url, ua.chrome)
try:
node = etree.fromstring(content.encode('utf-8'), parser=etree.XMLParser(encoding='utf-8'))
except UnicodeEncodeError as e:
sys.exit(1)
channel = extract_channel(node)
title = extract_title(node)
video = extract_video(node)
if video and channel and title:
print u'{}_{}.mp4'.format(channel.lower(), title.replace(' ', '_').replace('.', '').replace(':', '').replace('-', '').replace('?', '').replace('!', '').replace(',', '').replace('__', '_').replace('"', '').replace('/', '_').lower())
print u'{}\n'.format(video)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment