Created
July 8, 2017 10:09
-
-
Save 0xMH/8036c6563ddbd77d4fae48a12ea6ce76 to your computer and use it in GitHub Desktop.
downloadXkcd.py - Downloads every single XKCD comic.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! python3 | |
# downloadXkcd.py - Downloads every single XKCD comic. | |
# source: Automated the Boring Stuff with Python | |
import requests, os, bs4 | |
url = 'http://xkcd.com' # starting url | |
os.makedirs('xkcd', exist_ok=True) # store comics in ./xkcd | |
while not url.endswith('#'): | |
# Download the page. | |
print('Downloading page %s...' % url) | |
res = requests.get(url) | |
res.raise_for_status() | |
soup = bs4.BeautifulSoup(res.text, 'xml') | |
# Find the URL of the comic image. | |
comicElem = soup.select('#comic img') | |
if comicElem == []: | |
print('Could not find comic image.') | |
else: | |
comicUrl = 'http://' + comicElem[0].get('src')[2:] | |
# Download the image. | |
print('Downloading image %s...' % (comicUrl)) | |
res = requests.get(comicUrl) | |
res.raise_for_status() | |
# Save the image to ./xkcd | |
imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb') | |
for chunk in res.iter_content(100000): | |
imageFile.write(chunk) | |
imageFile.close() | |
# Get the Prev button's url. | |
prevLink = soup.select('a[rel="prev"]')[0] | |
url = 'http://xkcd.com' + prevLink.get('href') | |
print('Done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment