Created
June 22, 2021 13:11
-
-
Save teidesu/050b29430bd66b85229fe00ea8872dc8 to your computer and use it in GitHub Desktop.
Simple script to download galleries from exhentai.org w/out GPs, H@H or torrents.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| /* | |
| Simple script to download galleries from exhentai.org w/out GPs, H@H or torrents | |
| Requires node >= 10 to run (uses fs.promises) | |
| Dependencies: | |
| yarn add cheerio node-fetch mime-types | |
| # or | |
| npm install cheerio node-fetch mime-types | |
| Also if you want proxies you will need http-proxy-agent for http proxies | |
| and socks-proxy-agent for socks proxies | |
| (c) teidesu 2020. This script is licensed under GPLv3 | |
| */ | |
| const cheerio = require('cheerio') | |
| const fetch = require('node-fetch') | |
| const fs = require('fs') | |
| const path = require('path') | |
| const mime = require('mime-types') | |
| const cookie = 'PUT YOUR COOKIES HERE' | |
| const userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36' | |
| const headers = { | |
| cookie, | |
| 'user-agent': userAgent, | |
| accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', | |
| } | |
| let httpAgent = undefined | |
| const die = (err) => { | |
| console.error(err) | |
| process.exit(1) | |
| } | |
| const log = (...args) => console.log('[i]', ...args) | |
| let current = 0 | |
| let total = 0 | |
| let padSize = 0 | |
| function strFill (char, size) { | |
| const r = [] | |
| for (let i = 0; i < size; i++) { | |
| r[i] = char | |
| } | |
| return r.join('') | |
| } | |
| function padLeft (text, length, char = ' ') { | |
| if (text.length >= length) return text | |
| const d = length - text.length | |
| return strFill(char, d) + text | |
| } | |
| function renderProgress () { | |
| const part = current / total | |
| const pbs = process.stdout.columns - total.toString(10).length * 2 - 6 | |
| const w = Math.round(part * pbs) | |
| process.stdout.write(`[${strFill('#', w)}${strFill('-', pbs - w)}] ` + | |
| `${padLeft(current.toString(10), total.toString(10).length)}/${total}\r`) | |
| } | |
| function clearProgress () { | |
| process.stdout.write(strFill(' ', process.stdout.columns) + '\r') | |
| } | |
| function sanitizeFilename (str, replacement = '') { | |
| // from https://github.com/parshap/node-sanitize-filename/blob/master/index.js (WTFPL & ISC) | |
| var illegalRe = /[\/\?<>\\:\*\|"]/g | |
| var controlRe = /[\x00-\x1f\x80-\x9f]/g | |
| var reservedRe = /^\.+$/ | |
| var windowsReservedRe = /^(con|prn|aux|nul|com[0-9]|lpt[0-9])(\..*)?$/i | |
| var windowsTrailingRe = /[\. ]+$/ | |
| return str | |
| .replace(illegalRe, replacement) | |
| .replace(controlRe, replacement) | |
| .replace(reservedRe, replacement) | |
| .replace(windowsReservedRe, replacement) | |
| .replace(windowsTrailingRe, replacement) | |
| } | |
| function parseRanges (str, total) { | |
| let ret = { | |
| min: Infinity, | |
| max: 0, | |
| index: {}, | |
| count: 0, | |
| null: false | |
| } | |
| if (str) { | |
| for (let it of str.split(',')) { | |
| if (it.includes('-')) { | |
| let [from, to] = it.split('-') | |
| if (to === '*') to = total | |
| else to = parseInt(to) | |
| from = parseInt(from) | |
| if (isNaN(to) || isNaN(from)) { | |
| die('Range parse failed at ' + it + ': invalid number') | |
| } | |
| if (from > to) { | |
| let tmp = to | |
| to = from | |
| from = tmp | |
| } | |
| if (from < 0) { | |
| die('Range parse failed at ' + it + ': negative from') | |
| } | |
| if (from < ret.min) ret.min = from | |
| if (to > ret.max) ret.max = to | |
| for (let i = from; i <= to; i++) { | |
| ret.index[i] = true | |
| } | |
| } else { | |
| it = parseInt(it) | |
| if (isNaN(it)) { | |
| die('Range parse failed at ' + it + ': invalid number') | |
| } | |
| if (it < 0) { | |
| die('Range parse failed at ' + it + ': negative image') | |
| } | |
| if (it < ret.min) ret.min = it | |
| if (it > ret.max) ret.max = it | |
| ret.index[it] = true | |
| } | |
| } | |
| ret.count = Object.keys(ret.index).length | |
| } else { | |
| ret.min = 1 | |
| ret.max = total | |
| for (let i = 1; i <= total; i++) ret.index[i] = true | |
| ret.count = total | |
| ret.null = true | |
| } | |
| return ret | |
| } | |
| async function loadPage (url, n = 0) { | |
| let page = await fetch(n > 0 ? (url + '?p=' + n) : url, { | |
| headers: { | |
| ...headers, | |
| ...(n > 0 ? { referer: url } : {}), | |
| }, | |
| agent: httpAgent | |
| }).then(i => i.text()).catch(die) | |
| if (page.match(/incorrect key provided/i)) die(new Error('Invalid URL: ' + page)) | |
| return cheerio.load(page) | |
| } | |
| function downloadImage (imageN, url, $, dest) { | |
| let fullImgUrl = $('#i7.if a').attr('href') | |
| let lowResImgUrl = $('img#img').attr('src') | |
| if (lowResImgUrl.match(/509\.gif/i)) { | |
| clearProgress() | |
| die('Looks like a limit. Change ip or wait. Failed to download image ' + imageN) | |
| } | |
| if (!fullImgUrl) { | |
| fullImgUrl = lowResImgUrl | |
| } | |
| return new Promise((resolve, reject) => { | |
| fetch(fullImgUrl, { | |
| headers: { | |
| ...headers, | |
| referer: url | |
| }, | |
| agent: httpAgent | |
| }).then(res => { | |
| const m = (res.headers.get('content-disposition') || '').match(/filename=.*\.(.+)$/) | |
| const ext = m ? m[1] : res.headers.get('content-type') | |
| ? mime.extension(res.headers.get('content-type')) | |
| : 'bin' | |
| if (ext === 'html') { | |
| clearProgress() | |
| die('Looks like a limit. Change ip or wait. Failed to download image ' + imageN) | |
| } | |
| const s = fs.createWriteStream(path.join(dest, padLeft(imageN + '', padSize, '0') + '.' + ext)) | |
| res.body.pipe(s) | |
| s.on('finish', resolve) | |
| s.on('error', reject) | |
| }) | |
| }) | |
| } | |
| async function downloadPage ($, n, dest, ranges) { | |
| clearProgress() | |
| log('Downloading page ' + (n + 1)) | |
| renderProgress() | |
| let links = $('.gdtl a').toArray().map(i => i.attribs.href) | |
| for (let link of links) { | |
| const imageN = link.split('-').pop() | |
| if (!ranges.index[imageN]) continue | |
| await loadPage(link).then(_ => downloadImage(imageN, link, _, dest)) | |
| current++ | |
| renderProgress() | |
| } | |
| } | |
| async function downloadGallery (url, dest = null, ranges = null) { | |
| log('Target directory is ' + dest) | |
| log('Loading first page') | |
| let $ = await loadPage(url) | |
| let numPages = parseInt($('.ptt td:nth-last-child(2)').text()) | |
| let m = $('.gpc').text().match(/- ([0-9,]+) of ([0-9,]+)/) | |
| let perPage = parseInt(m[1].replace(/,/g, '')) | |
| total = parseInt(m[2].replace(/,/g, '')) | |
| padSize = m[2].length | |
| ranges = parseRanges(ranges, total) | |
| if (!dest) { | |
| dest = sanitizeFilename($('h1#gn').text()) | |
| log('Target directory is ' + dest) | |
| } | |
| await fs.promises.mkdir(dest, { recursive: true }) | |
| log(`Found ${numPages} pages with a total of ${total} images (${perPage} per page)`) | |
| if (!ranges.null) { | |
| log(`Based on ranges: ${ranges.min}-${ranges.max} will be parsed, with a total of ${ranges.count} images downloaded`) | |
| } | |
| total = ranges.count | |
| log('Starting download') | |
| renderProgress() | |
| let startPage = Math.floor((ranges.min - 1) / perPage) | |
| let endPage = Math.ceil((ranges.max - 1) / perPage) | |
| if (startPage === 0) { | |
| await downloadPage($, 0, dest, ranges) | |
| startPage = 1 | |
| } | |
| for (let i = startPage; i < endPage; i++) { | |
| await loadPage(url, i).then(_ => downloadPage(_, i, dest, ranges)) | |
| } | |
| clearProgress() | |
| log('Download finished!') | |
| } | |
| async function main () { | |
| let url = process.argv[2] | |
| let i = 3 | |
| let params = { | |
| dest: undefined, | |
| proxy: undefined, | |
| ranges: undefined, | |
| help: undefined, | |
| socks: undefined | |
| } | |
| while (i < process.argv.length) { | |
| let par = process.argv[i] | |
| if (par.startsWith('--') && par.substr(2) in params) { | |
| params[par.substr(2)] = process.argv[++i] | |
| } | |
| i++ | |
| } | |
| if (!url || params.help) { | |
| console.log('Usage: ' + process.argv[1] + ' <url> [params]') | |
| console.log('After url you can use params:') | |
| console.log(' --dest ./dest-folder sets destination folder. default is auto-name by gallery name') | |
| console.log(' --proxy http://127.0.0.1:1234 sets http proxy to use') | |
| console.log(' --socks socks://127.0.0.1:1234 sets socks proxy to use') | |
| console.log(' --ranges 1,2,50-150 sets download ranges (see below)') | |
| console.log('') | |
| console.log('Ranges format: delimited by comma (,) and each item should be either:') | |
| console.log(' range: 1-100 (first may be bigger than second)') | |
| console.log(' number: 3') | |
| console.log(' range without end: 100-* (* will be replaced with total img number)') | |
| console.log('Image counting starts from 1.') | |
| return | |
| } | |
| if (params.proxy) { | |
| const HttpProxyAgent = require('http-proxy-agent') | |
| httpAgent = new HttpProxyAgent(params.proxy) | |
| } | |
| if (params.socks) { | |
| const SocksProxyAgent = require('socks-proxy-agent') | |
| httpAgent = new SocksProxyAgent(params.socks) | |
| } | |
| return downloadGallery(url, params.dest, params.ranges) | |
| } | |
| if (require.main === module) { | |
| main().catch(die) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment