Skip to content

Instantly share code, notes, and snippets.

@suhithr
Last active October 30, 2016 19:04
Show Gist options
  • Save suhithr/9a06a422f68a155a6b16494d81f5a21f to your computer and use it in GitHub Desktop.
Save suhithr/9a06a422f68a155a6b16494d81f5a21f to your computer and use it in GitHub Desktop.
A very simple 9gag scraper
# Code used for the Delta Workshop on Python and Linux 2016 at NIT, Trichy
import urllib
from time import sleep
import requests # http://docs.python-requests.org/en/master/
from bs4 import BeautifulSoup # https://www.crummy.com/software/BeautifulSoup/bs4/doc/
# Try printing out the variables at various stages to view the objects
# Better yet execute this in your Python interpreter to see all the action happening
# and print each variable
url = "http://9gag.com"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
img_tags = soup.select('img.badge-item-img')
# This is a CSS selector: The format is tag.class#id
# soup.find_all('img')
# We are not using the find_all() method because it gets all the images
# All the images are not what we want, since some of them are badges, icons, and we only want images that are part of the feed
# We will only download the first ten images
n = 10
for img in img_tags:
if img['src'] is not None and n > 0:
sleep(1) # Adding a delay so we don't flood 9gag's servers
urllib.urlretrieve(img['src'], img['alt'])
print "Downloaded {}".format(img['alt'])
n -= 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment