Last active
July 15, 2016 16:42
-
-
Save calebreister/948c7e1a227802a3ab2515d0488636ad to your computer and use it in GitHub Desktop.
Python script that downloads xkcd comics to basePath, prepends the comic number to the filename, and puts the alt text in 0-alt.txt. I recommend setting basePath to an absolute path.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
#Download xkcd comics by their index numbers (passed as arguments) | |
import sys | |
import re | |
import json | |
from urllib.request import * | |
basePath = '/home/caleb/Pictures/Comics/xkcd/' | |
#Read alt text file into memory | |
altFile = open(basePath+'0-alt.txt', encoding='utf-8') | |
altText = [line.strip() for line in altFile] | |
altFile.close() | |
sys.argv.pop(0) #remove argv[0] (name of script) | |
for comicNum in sys.argv: | |
#Skip duplicates | |
skip = False | |
for l in altText: | |
if (str(comicNum)+':' in l): | |
skip = True | |
if skip == True: | |
continue | |
#Download alt text | |
url = urlopen('http://xkcd.com/'+str(comicNum)+'/info.0.json') | |
data = json.loads(url.read().decode('utf-8')) | |
altText.append(str(comicNum)+': '+data['alt']) | |
#Download image | |
m = re.search('(?<=\/)[\w]+\.(jpg|png)', data['img']) | |
url = urlretrieve(data['img'], basePath+str(comicNum)+'-'+m.group()) | |
#Write alt text file to disk | |
altFile = open(basePath+'0-alt.txt', 'w', encoding='utf-8') | |
for l in altText: | |
if l == '': continue | |
altFile.write(l+'\n\n') | |
altFile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment