Last active
November 23, 2017 15:32
-
-
Save lanekatris/cb699c5cc017e93038c912fdb7f2ccf6 to your computer and use it in GitHub Desktop.
ES6 javascript to download certificates from proprofs.com via webscraping. Supply a base integer to the start() method (typically a certificate you know of), and it will either increment the value and download or decrement, which you can change in the _modifyId() function. It limits to 100 and then quits downloading, this can be altered.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const request = require('request'); | |
| const cheerio = require('cheerio'); | |
| const http = require('https'); | |
| const fs = require('fs'); | |
| class CertificateDownloader { | |
| constructor() { | |
| this.limit = 100; | |
| this.downloadCount = 0; | |
| this.skippedImageCount = 0; | |
| this.imageExtension = '.jpg'; | |
| this.currentId = -1; | |
| this.rootUrl = 'https://www.proprofs.com/'; | |
| } | |
| _modifyId() { | |
| this.currentId--; | |
| } | |
| _doesFileExist() { | |
| return fs.existsSync(`${this.currentId}${this.imageExtension}`); | |
| } | |
| _skipDownload() { | |
| console.log(`${this.currentId}${this.imageExtension} already exists, skipping...`); | |
| this._modifyId(); | |
| this.skippedImageCount++; | |
| this.start(this.currentId); | |
| } | |
| start(idToStartWith) { | |
| let self = this; | |
| if (typeof idToStartWith !== 'number') throw new Error('idToStartWith is required, and to be a number'); | |
| self.currentId = idToStartWith; | |
| if (this._doesFileExist()) | |
| return this._skipDownload(); | |
| if (this.downloadCount >= this.limit) | |
| return console.log('Limit met, quitting.'); | |
| console.log(`Getting html for ${this.currentId}, download count ${this.downloadCount}, limit: ${this.limit}, skipped images: ${this.skippedImageCount}...`); | |
| request(`${this.rootUrl}/quiz-school/usercertificate.php?id=${this.currentId}`, (err, response, body) => { | |
| if (response.statusCode === 404 || response.statusCode === 500) { | |
| console.log(`Got a status code of: ${response.statusCode}. Trying next...`); | |
| self._modifyId(); | |
| return self.start(self.currentId); | |
| } | |
| const $ = cheerio.load(body); | |
| let img = $('body').find('img#certImage'); | |
| let src = `${this.rootUrl}${img[0].attribs.src}`; | |
| console.log('Getting image...'); | |
| let file = fs.createWriteStream(`${this.currentId}${this.imageExtension}`); | |
| http.get(src, function(response) { | |
| response.pipe(file); | |
| console.log('Done!'); | |
| self._modifyId(); | |
| self.downloadCount++; | |
| self.start(self.currentId); | |
| }); | |
| }); | |
| } | |
| } | |
| let downloader = new CertificateDownloader(); | |
| downloader.start(152895196); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment