Created
July 18, 2019 21:34
-
-
Save mon0theist/a042a0e3d479df69595152a4af5f0bc0 to your computer and use it in GitHub Desktop.
ATBS Chapter 11 xkcd Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# ATBS Chapter 11 | |
# XKCD Downloader | |
# import modules | |
import requests, os, bs4 | |
url = 'http://xkcd.com' # starting URL | |
os.makedirs('xkcd', exist_ok=True) # store comics in ./xkcd | |
# exist_ok=True keyword argument prevents the function from throwing an | |
# exception if this folder already exists. | |
while not url.endswith('#'): # '#' indicates either first or last comic | |
# Download the page | |
print('Downloading page %s...' % url) | |
res = requests.get(url) | |
res.raise_for_status() | |
soup = bs4.BeautifulSoup(res.text) | |
# Find the URL of the comic image | |
comicElem = soup.select('#comic img') | |
if comicElem == []: | |
print('Could not find comic image.') | |
# comicElem would be a list value, so if list is blank, there was no | |
# comic to be found on that page | |
else: | |
try: | |
comicURL = 'http:' + comicElem[0].get('src') | |
# Download the image | |
print('Downloading image %s...' % (comicURL)) | |
res = requests.get(comicURL) | |
res.raise_for_status() | |
except requests.exceptions.MissingSchema: | |
# skip this comic | |
prevLink = soup.select('a[rel="prev"]')[0] | |
url = 'http://xkcd.com' + prevLink.get('href') | |
continue | |
# Save the image to ./xkcd | |
imageFile = open(os.path.join('xkcd', os.path.basename(comicURL, 'wb') | |
for chunk in res.iter_content(100000): | |
imageFile.write(chunk) | |
imageFile.close() | |
# Get the Prev button's URL | |
prevLink = soup.select('a[rel="prev"]')[0] | |
url = 'http://xkcd.com' + prevLink.get('href') | |
print('Done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment