impshum/xkcd.py

sdegutis · 2018-05-31T04:16:54Z

Super cool! You inspired me to give it a try too! Here's my take in Node.js, same #LOC:

const fs = require('fs'), https = require('https');

const getJSON = url =>
  new Promise(resolve =>
    https.get(url, (response) =>
      response.statusCode === 404 ? {} :
        response
          .on('data', chunk => response.body = (response.body || '') + chunk)
          .on('end', () => resolve(JSON.parse(response.body)))));

async function save(i) {
  const { img, safe_title } = await getJSON(`https://xkcd.com/${i}/info.0.json`);
  console.log(`${i}: ${safe_title}`);
  if (img) {
    const filename = `comics/${i}-${safe_title.replace(' ', '_')}`;
    https.get(img, res => res.pipe(fs.createWriteStream(filename)));
  }
  if (img || i === 404) save(i + 1);
}

save(1);

impshum · 2018-05-31T15:17:29Z

@sdegutis Did you do a full run?

sdegutis · 2018-05-31T15:43:14Z

@impshum I did not :/

impshum · 2018-06-02T20:07:11Z

Test it out man. I had to fix the 404 error/comic thing and the file types. I'll find a smaller way to do this when I do. For now it works from start to finish... BAM!

4lpha0ne · 2018-06-05T11:59:23Z

8 LOC:

import requests
lr = n = 1
while True:
    r = requests.get('https://xkcd.com/{}/info.0.json'.format(n))
    if r.status_code != 404 and r.json()['img'][-4]=='.':
        with open('comics/{}-{}.{}'.format(r.json()['num'], ''.join(['_' if c in '\\/`*{}[]()<>#+!?:' else c for c in r.json()['safe_title']]), r.json()['img'][-3:]), "wb") as f: f.write(requests.get(r.json()['img']).content)
    elif lr==404: break
    lr=r.status_code ; n += 1

Long version, with makedir:

import os                        # for mkdir                  -> remove for 12 line version
if not os.path.exists('comics'): # check for dir's existence  -> remove for 12 line version
    os.makedirs('comics')        # make dir, if needed        -> remove for 12 line version
import requests
last_status = n = 0              # init n and last request status code
while True:
    r = requests.get('https://xkcd.com/{}/info.0.json'.format(n)) # get page
    if r.status_code != 404 and r.json()['img'][-4]=='.': # check for status code and if there is a dot, which indicates a typical img filename ending
        d = r.json()             # parse
        print('{}: {}'.format(d['num'], d['safe_title'])) # print id + title -> remove for 12 line version
        with open('comics/{}-{}.{}'.format(d['num'], # a bit dense ;) create path
                  ''.join(['_' if c in '\\/`*{}[]()<>#+!?:' else c for c in d['safe_title']]), #replace unwanted chars
                  d['img'][-3:]), "wb") as f: # get extension from json img info, open file
            f.write(requests.get(d['img']).content) # write the content received from json img path
    elif last_status == 404:     # end condition: stop if we find a 2nd #404 error (there are no more pages)
        break
    last_status = r.status_code  # remember last status code
    n += 1                       # next one, please

	# Create a directory called comics next to the script first
	import requests

	n = 933

	while True:
	if n == 404:
	n += 1
	url = 'https://xkcd.com/{}/info.0.json'.format(n)
	r = requests.get(url)
	if r.status_code == 404:
	break
	d = r.json()
	n = d['num']
	t = d['safe_title']
	u = d['img']
	print('{}: {}'.format(n, t))
	if u.endswith('.png'):
	ext = 'png'
	if u.endswith('.jpg'):
	ext = 'jpg'
	if u.endswith('.gif'):
	ext = 'gif'
	x = 'comics/{}-{}.{}'.format(n, t.replace(' ', '_').replace('/', '-'), ext)
	with open(x, "wb") as f:
	c = requests.get(u)
	f.write(c.content)
	n += 1

impshum/xkcd.py

sdegutis commented May 31, 2018

impshum commented May 31, 2018

sdegutis commented May 31, 2018

impshum commented Jun 2, 2018 •

edited

Loading

4lpha0ne commented Jun 5, 2018 •

edited

Loading

impshum/xkcd.py

sdegutis commented May 31, 2018

impshum commented May 31, 2018

sdegutis commented May 31, 2018

impshum commented Jun 2, 2018 • edited Loading

4lpha0ne commented Jun 5, 2018 • edited Loading

impshum commented Jun 2, 2018 •

edited

Loading

4lpha0ne commented Jun 5, 2018 •

edited

Loading