Skip to content

Instantly share code, notes, and snippets.

@blha303
Last active March 21, 2016 14:38
Show Gist options
  • Save blha303/cba598871c735f31403b to your computer and use it in GitHub Desktop.
Save blha303/cba598871c735f31403b to your computer and use it in GitHub Desktop.
A script that loads the given (list of) urls, waits for there to be a video tag on the page, then starts downloading the video using the page url as a filename (e.g http://example.com/my-show/episode-1 will produce my-show.episode-1.mp4)
#!/usr/bin/env python3
# pip install selenium, you probably want firefox as well or it won't work so good
import contextlib
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from subprocess import Popen
from sys import argv, stderr, stdin
from os import environ
if len(argv) > 1:
if argv[1] == "-":
urls = [line for line in stdin]
else:
urls = argv[1:]
with contextlib.closing(webdriver.Firefox()) as driver:
for i in urls:
while True:
try:
driver.get(i)
WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_tag_name('video'))
video = driver.find_element_by_tag_name("video").get_attribute("src")
show, title = driver.current_url.split("/")[-2:]
filename = "{}.{}.mp4".format(show, title[:title.index("?")])
driver.get("about:blank")
print("Getting {}".format(filename), file=stderr)
wget = Popen(["wget", "-O", filename, video])
wget.wait()
except TimeoutException: # refreshes after ten seconds to try again
continue
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment