Skip to content

Instantly share code, notes, and snippets.

@lanekatris
Last active November 23, 2017 15:32
Show Gist options
  • Select an option

  • Save lanekatris/cb699c5cc017e93038c912fdb7f2ccf6 to your computer and use it in GitHub Desktop.

Select an option

Save lanekatris/cb699c5cc017e93038c912fdb7f2ccf6 to your computer and use it in GitHub Desktop.
ES6 javascript to download certificates from proprofs.com via webscraping. Supply a base integer to the start() method (typically a certificate you know of), and it will either increment the value and download or decrement, which you can change in the _modifyId() function. It limits to 100 and then quits downloading, this can be altered.
const request = require('request');
const cheerio = require('cheerio');
const http = require('https');
const fs = require('fs');
class CertificateDownloader {
constructor() {
this.limit = 100;
this.downloadCount = 0;
this.skippedImageCount = 0;
this.imageExtension = '.jpg';
this.currentId = -1;
this.rootUrl = 'https://www.proprofs.com/';
}
_modifyId() {
this.currentId--;
}
_doesFileExist() {
return fs.existsSync(`${this.currentId}${this.imageExtension}`);
}
_skipDownload() {
console.log(`${this.currentId}${this.imageExtension} already exists, skipping...`);
this._modifyId();
this.skippedImageCount++;
this.start(this.currentId);
}
start(idToStartWith) {
let self = this;
if (typeof idToStartWith !== 'number') throw new Error('idToStartWith is required, and to be a number');
self.currentId = idToStartWith;
if (this._doesFileExist())
return this._skipDownload();
if (this.downloadCount >= this.limit)
return console.log('Limit met, quitting.');
console.log(`Getting html for ${this.currentId}, download count ${this.downloadCount}, limit: ${this.limit}, skipped images: ${this.skippedImageCount}...`);
request(`${this.rootUrl}/quiz-school/usercertificate.php?id=${this.currentId}`, (err, response, body) => {
if (response.statusCode === 404 || response.statusCode === 500) {
console.log(`Got a status code of: ${response.statusCode}. Trying next...`);
self._modifyId();
return self.start(self.currentId);
}
const $ = cheerio.load(body);
let img = $('body').find('img#certImage');
let src = `${this.rootUrl}${img[0].attribs.src}`;
console.log('Getting image...');
let file = fs.createWriteStream(`${this.currentId}${this.imageExtension}`);
http.get(src, function(response) {
response.pipe(file);
console.log('Done!');
self._modifyId();
self.downloadCount++;
self.start(self.currentId);
});
});
}
}
let downloader = new CertificateDownloader();
downloader.start(152895196);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment