Skip to content

Instantly share code, notes, and snippets.

@teidesu
Created June 22, 2021 13:11
Show Gist options
  • Select an option

  • Save teidesu/050b29430bd66b85229fe00ea8872dc8 to your computer and use it in GitHub Desktop.

Select an option

Save teidesu/050b29430bd66b85229fe00ea8872dc8 to your computer and use it in GitHub Desktop.
Simple script to download galleries from exhentai.org w/out GPs, H@H or torrents.
#!/usr/bin/env node
/*
Simple script to download galleries from exhentai.org w/out GPs, H@H or torrents
Requires node >= 10 to run (uses fs.promises)
Dependencies:
yarn add cheerio node-fetch mime-types
# or
npm install cheerio node-fetch mime-types
Also if you want proxies you will need http-proxy-agent for http proxies
and socks-proxy-agent for socks proxies
(c) teidesu 2020. This script is licensed under GPLv3
*/
const cheerio = require('cheerio')
const fetch = require('node-fetch')
const fs = require('fs')
const path = require('path')
const mime = require('mime-types')
const cookie = 'PUT YOUR COOKIES HERE'
const userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
const headers = {
cookie,
'user-agent': userAgent,
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
}
let httpAgent = undefined
const die = (err) => {
console.error(err)
process.exit(1)
}
const log = (...args) => console.log('[i]', ...args)
let current = 0
let total = 0
let padSize = 0
function strFill (char, size) {
const r = []
for (let i = 0; i < size; i++) {
r[i] = char
}
return r.join('')
}
function padLeft (text, length, char = ' ') {
if (text.length >= length) return text
const d = length - text.length
return strFill(char, d) + text
}
function renderProgress () {
const part = current / total
const pbs = process.stdout.columns - total.toString(10).length * 2 - 6
const w = Math.round(part * pbs)
process.stdout.write(`[${strFill('#', w)}${strFill('-', pbs - w)}] ` +
`${padLeft(current.toString(10), total.toString(10).length)}/${total}\r`)
}
function clearProgress () {
process.stdout.write(strFill(' ', process.stdout.columns) + '\r')
}
function sanitizeFilename (str, replacement = '') {
// from https://github.com/parshap/node-sanitize-filename/blob/master/index.js (WTFPL & ISC)
var illegalRe = /[\/\?<>\\:\*\|"]/g
var controlRe = /[\x00-\x1f\x80-\x9f]/g
var reservedRe = /^\.+$/
var windowsReservedRe = /^(con|prn|aux|nul|com[0-9]|lpt[0-9])(\..*)?$/i
var windowsTrailingRe = /[\. ]+$/
return str
.replace(illegalRe, replacement)
.replace(controlRe, replacement)
.replace(reservedRe, replacement)
.replace(windowsReservedRe, replacement)
.replace(windowsTrailingRe, replacement)
}
function parseRanges (str, total) {
let ret = {
min: Infinity,
max: 0,
index: {},
count: 0,
null: false
}
if (str) {
for (let it of str.split(',')) {
if (it.includes('-')) {
let [from, to] = it.split('-')
if (to === '*') to = total
else to = parseInt(to)
from = parseInt(from)
if (isNaN(to) || isNaN(from)) {
die('Range parse failed at ' + it + ': invalid number')
}
if (from > to) {
let tmp = to
to = from
from = tmp
}
if (from < 0) {
die('Range parse failed at ' + it + ': negative from')
}
if (from < ret.min) ret.min = from
if (to > ret.max) ret.max = to
for (let i = from; i <= to; i++) {
ret.index[i] = true
}
} else {
it = parseInt(it)
if (isNaN(it)) {
die('Range parse failed at ' + it + ': invalid number')
}
if (it < 0) {
die('Range parse failed at ' + it + ': negative image')
}
if (it < ret.min) ret.min = it
if (it > ret.max) ret.max = it
ret.index[it] = true
}
}
ret.count = Object.keys(ret.index).length
} else {
ret.min = 1
ret.max = total
for (let i = 1; i <= total; i++) ret.index[i] = true
ret.count = total
ret.null = true
}
return ret
}
async function loadPage (url, n = 0) {
let page = await fetch(n > 0 ? (url + '?p=' + n) : url, {
headers: {
...headers,
...(n > 0 ? { referer: url } : {}),
},
agent: httpAgent
}).then(i => i.text()).catch(die)
if (page.match(/incorrect key provided/i)) die(new Error('Invalid URL: ' + page))
return cheerio.load(page)
}
function downloadImage (imageN, url, $, dest) {
let fullImgUrl = $('#i7.if a').attr('href')
let lowResImgUrl = $('img#img').attr('src')
if (lowResImgUrl.match(/509\.gif/i)) {
clearProgress()
die('Looks like a limit. Change ip or wait. Failed to download image ' + imageN)
}
if (!fullImgUrl) {
fullImgUrl = lowResImgUrl
}
return new Promise((resolve, reject) => {
fetch(fullImgUrl, {
headers: {
...headers,
referer: url
},
agent: httpAgent
}).then(res => {
const m = (res.headers.get('content-disposition') || '').match(/filename=.*\.(.+)$/)
const ext = m ? m[1] : res.headers.get('content-type')
? mime.extension(res.headers.get('content-type'))
: 'bin'
if (ext === 'html') {
clearProgress()
die('Looks like a limit. Change ip or wait. Failed to download image ' + imageN)
}
const s = fs.createWriteStream(path.join(dest, padLeft(imageN + '', padSize, '0') + '.' + ext))
res.body.pipe(s)
s.on('finish', resolve)
s.on('error', reject)
})
})
}
async function downloadPage ($, n, dest, ranges) {
clearProgress()
log('Downloading page ' + (n + 1))
renderProgress()
let links = $('.gdtl a').toArray().map(i => i.attribs.href)
for (let link of links) {
const imageN = link.split('-').pop()
if (!ranges.index[imageN]) continue
await loadPage(link).then(_ => downloadImage(imageN, link, _, dest))
current++
renderProgress()
}
}
async function downloadGallery (url, dest = null, ranges = null) {
log('Target directory is ' + dest)
log('Loading first page')
let $ = await loadPage(url)
let numPages = parseInt($('.ptt td:nth-last-child(2)').text())
let m = $('.gpc').text().match(/- ([0-9,]+) of ([0-9,]+)/)
let perPage = parseInt(m[1].replace(/,/g, ''))
total = parseInt(m[2].replace(/,/g, ''))
padSize = m[2].length
ranges = parseRanges(ranges, total)
if (!dest) {
dest = sanitizeFilename($('h1#gn').text())
log('Target directory is ' + dest)
}
await fs.promises.mkdir(dest, { recursive: true })
log(`Found ${numPages} pages with a total of ${total} images (${perPage} per page)`)
if (!ranges.null) {
log(`Based on ranges: ${ranges.min}-${ranges.max} will be parsed, with a total of ${ranges.count} images downloaded`)
}
total = ranges.count
log('Starting download')
renderProgress()
let startPage = Math.floor((ranges.min - 1) / perPage)
let endPage = Math.ceil((ranges.max - 1) / perPage)
if (startPage === 0) {
await downloadPage($, 0, dest, ranges)
startPage = 1
}
for (let i = startPage; i < endPage; i++) {
await loadPage(url, i).then(_ => downloadPage(_, i, dest, ranges))
}
clearProgress()
log('Download finished!')
}
async function main () {
let url = process.argv[2]
let i = 3
let params = {
dest: undefined,
proxy: undefined,
ranges: undefined,
help: undefined,
socks: undefined
}
while (i < process.argv.length) {
let par = process.argv[i]
if (par.startsWith('--') && par.substr(2) in params) {
params[par.substr(2)] = process.argv[++i]
}
i++
}
if (!url || params.help) {
console.log('Usage: ' + process.argv[1] + ' <url> [params]')
console.log('After url you can use params:')
console.log(' --dest ./dest-folder sets destination folder. default is auto-name by gallery name')
console.log(' --proxy http://127.0.0.1:1234 sets http proxy to use')
console.log(' --socks socks://127.0.0.1:1234 sets socks proxy to use')
console.log(' --ranges 1,2,50-150 sets download ranges (see below)')
console.log('')
console.log('Ranges format: delimited by comma (,) and each item should be either:')
console.log(' range: 1-100 (first may be bigger than second)')
console.log(' number: 3')
console.log(' range without end: 100-* (* will be replaced with total img number)')
console.log('Image counting starts from 1.')
return
}
if (params.proxy) {
const HttpProxyAgent = require('http-proxy-agent')
httpAgent = new HttpProxyAgent(params.proxy)
}
if (params.socks) {
const SocksProxyAgent = require('socks-proxy-agent')
httpAgent = new SocksProxyAgent(params.socks)
}
return downloadGallery(url, params.dest, params.ranges)
}
if (require.main === module) {
main().catch(die)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment