Skip to content

Instantly share code, notes, and snippets.

@laurent-dinh
Last active March 13, 2017 16:04
Show Gist options
  • Save laurent-dinh/e1a3cdac6ea0ebf4bf2a9fe7d3b36644 to your computer and use it in GitHub Desktop.
Save laurent-dinh/e1a3cdac6ea0ebf4bf2a9fe7d3b36644 to your computer and use it in GitHub Desktop.
Did it for the Vine
import requests
import time
import random
from bs4 import BeautifulSoup
import argparse
import urllib
import string
parser = argparse.ArgumentParser()
parser.add_argument("--bookmarks", type=str, default="")
# You use the script as follow:
# $ python2.7 dl_vines.py --bookmarks html_file_with_vines.html
# where html_file_with_vines.html is the path to a HTML file containing
# links to relevant vines.
args = parser.parse_args()
with open(args.bookmarks, "r") as fin:
lines = fin.readlines()
content = "\n".join([line.strip() for line in lines])
soup = BeautifulSoup(content, "html.parser")
vines = [elem.get("href") for elem in soup.find_all("a") if "vine.co/v/" in elem.get("href")]
print "Downloading:"
for vine in vines:
print vine
print "Confirm ? [y/n]"
answer = raw_input()
if answer.lower() == "y":
url_opener = urllib.FancyURLopener()
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
print "Starting download..."
for vine in vines:
time.sleep(random.random() * 5)
try:
print "Downloading %s" % vine
vine = vine.strip()
page = requests.get(vine)
soup = BeautifulSoup(page.content, "html.parser")
title = soup.find_all('title')[0].string.strip()
print "Title: %s" % title
title = ''.join(c for c in title if c in valid_chars)
print "renamed as %s" % title
vid_url = filter(
lambda x:"mp4?" in x.get("content"),
filter(
lambda x:x.has_attr("content"),
soup.find_all("meta")))[0].get("content")
url_opener.retrieve(
vid_url,
"%s (%s).mp4" % (vine.split("/")[-1], title))
except Exception as e:
print "Failed !"
print e
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment