Last active
March 21, 2016 14:38
-
-
Save blha303/cba598871c735f31403b to your computer and use it in GitHub Desktop.
A script that loads the given (list of) urls, waits for there to be a video tag on the page, then starts downloading the video using the page url as a filename (e.g http://example.com/my-show/episode-1 will produce my-show.episode-1.mp4)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# pip install selenium, you probably want firefox as well or it won't work so good | |
import contextlib | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import TimeoutException | |
from subprocess import Popen | |
from sys import argv, stderr, stdin | |
from os import environ | |
if len(argv) > 1: | |
if argv[1] == "-": | |
urls = [line for line in stdin] | |
else: | |
urls = argv[1:] | |
with contextlib.closing(webdriver.Firefox()) as driver: | |
for i in urls: | |
while True: | |
try: | |
driver.get(i) | |
WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_tag_name('video')) | |
video = driver.find_element_by_tag_name("video").get_attribute("src") | |
show, title = driver.current_url.split("/")[-2:] | |
filename = "{}.{}.mp4".format(show, title[:title.index("?")]) | |
driver.get("about:blank") | |
print("Getting {}".format(filename), file=stderr) | |
wget = Popen(["wget", "-O", filename, video]) | |
wget.wait() | |
except TimeoutException: # refreshes after ten seconds to try again | |
continue | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment