Skip to content

Instantly share code, notes, and snippets.

@djm
Created July 19, 2014 13:03
Show Gist options
  • Select an option

  • Save djm/e434627fad2765ea2131 to your computer and use it in GitHub Desktop.

Select an option

Save djm/e434627fad2765ea2131 to your computer and use it in GitHub Desktop.
Download all from imgur embed URL
import os
import sys
from os.path import basename, splitext
from urlparse import urlparse
import requests
from bs4 import BeautifulSoup
def extract_full_size_url_from_tag(image_tag):
thumbnail_src = image_tag['data-src']
parsed = urlparse(thumbnail_src)
file_name, file_ext = splitext(basename(parsed.path))
file_name = file_name[:-1]
return 'http://{}/{}{}'.format(parsed.hostname, file_name, file_ext)
def extract_file_name_from_url(image_url):
parsed = urlparse(image_url)
return basename(parsed.path)
def download_image(image_url, path):
print 'DOWNLOADING: %s @ %s' % (image_url, path)
r = requests.get(image_url, stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r.iter_content():
f.write(chunk)
print 'SAVED: %s' % path
def download_all_from_embed(embed_url, download_path):
embed_response = requests.get(embed_url)
if not embed_response.ok:
print "ERROR in response on %" % embed_url
sys.exit(1)
soup = BeautifulSoup(embed_response.content)
images = soup.find_all('img', class_="thumb-title-embed")
for image_tag in images:
image_url = extract_full_size_url_from_tag(image_tag)
file_name = extract_file_name_from_url(image_url)
path = os.path.join(download_path, file_name)
if not os.path.exists(path):
download_image(image_url, path)
else:
print 'SKIPPING: %s' % image_url
if __name__ == "__main__":
embed, path = sys.argv[1:]
if not os.path.exists(path):
os.mkdir(path)
download_all_from_embed(embed, path)
requests==2.3.0
beautifulsoup4==4.3.2
@djm
Copy link
Author

djm commented Jul 19, 2014

Usage:

python dl.py <url-of-embed-page>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment