Skip to content

Instantly share code, notes, and snippets.

@vinhjaxt
Created February 22, 2021 08:47
Show Gist options
  • Save vinhjaxt/17839695f2012a4032af4ca4c193fa35 to your computer and use it in GitHub Desktop.
Save vinhjaxt/17839695f2012a4032af4ca4c193fa35 to your computer and use it in GitHub Desktop.
#!/usr/bin/env node
const { URL } = require('url')
const fs = require('fs')
const path = require('path')
const noop = () => {}
const puppeteer = require('puppeteer-core')
process.on('uncaughtException', e => {
const d = new Date()
const time = d.getDate() + '/' + (d.getMonth() + 1) + '/' + d.getFullYear() + ' ' + d.getHours() + ':' + d.getMinutes() + ':' + d.getSeconds() + '.' + d.getMilliseconds()
console.error(time + ': ', e)
}).on('unhandledRejection', (reason, promise) => {
const d = new Date()
const time = d.getDate() + '/' + (d.getMonth() + 1) + '/' + d.getFullYear() + ' ' + d.getHours() + ':' + d.getMinutes() + ':' + d.getSeconds() + '.' + d.getMilliseconds()
console.log(time + ' unCatchedPromise: ', reason)
})
function download(url, remoteURL, isJsOnly, outputFolder) {
puppeteer.connect({
browserURL: remoteURL,
defaultViewport: null
}).then(async browser => {
try {
// const context = await browser.createIncognitoBrowserContext()
// const context = browser.defaultBrowserContext()
// let page = await context.newPage()
let page = await browser.newPage()
const requestMap = new Map()
page.once('close', () => {
console.log('====================================')
console.log('Done')
})
/*
page.on('requestfailed', r => {
console.log(r._requestId, r._failureText)
})
//*/
page.on('response', r => {
const url = r.url()
if (url.startsWith('chrome-extension://')) return
const parsedURL = new URL(url)
const resourceType = r.request().resourceType()
if (isJsOnly && !['document', 'script', 'xhr', 'fetch', 'eventsource', 'manifest', 'other', 'websocket', 'signedexchange'].includes(resourceUsage)) return
console.log(resourceType, url)
if (!~parsedURL.hostname.indexOf('.')) return
let filePath = path.resolve(outputFolder ? `${outputFolder}/${parsedURL.hostname}/${parsedURL.pathname}` : `output-${parsedURL.hostname}/${parsedURL.pathname}`)
if (!parsedURL.pathname || parsedURL.pathname[parsedURL.pathname.length - 1] === '/') {
filePath = `${filePath}/index.html`
}
console.log('OUT: ', filePath)
r.buffer().then(buff => {
mkdirp(path.dirname(filePath))
fs.writeFile(filePath, buff, noop)
})
})
try {
// https://stackoverflow.com/questions/53039551/selenium-webdriver-modifying-navigator-webdriver-flag-to-prevent-selenium-detec
await page.evaluateOnNewDocument(`Object.defineProperty(navigator, 'webdriver', { get: () => undefined })`)
} catch (e) {
page.close()
throw e
}
page.goto(url)
} catch (e) {
console.error(e)
}
}).catch(e => {
console.error(e)
console.error('Bật chrome với option --remote-debugging-port=9222 chưa?')
})
}
!(() => {
let url, isJsOnly, outputFolder
let remoteURL = '127.0.0.1:9222'
for (let i = 2; i < process.argv.length; i++) {
const argv = process.argv[i]
switch (argv) {
case '-js':
case '--js': {
isJsOnly = true
break
}
case '-o':
case '--output': {
outputFolder = process.argv[++i]
break
}
case '-a':
case '--addr': {
remoteURL = process.argv[++i]
break
}
default:
url = argv
}
}
if (url) {
if (!remoteURL.startsWith('http://')) {
if (remoteURL.startsWith('//')) {
remoteURL = 'http:' + remoteURL
} else {
remoteURL = 'http://' + remoteURL
}
}
download(url, remoteURL, isJsOnly, outputFolder)
} else {
console.error('Usage: node', process.argv[1], '[-js] [-o output-folder] [--addr http://127.0.0.1:9222] https://site_url_to_save')
process.exit(1)
}
})()
// mkdirp
// const path = require('path')
// const fs = require('fs')
const { resourceUsage } = require('process')
const _0777 = parseInt('0777', 8)
function mkdirp (p, opts, made) {
if (!opts || typeof opts !== 'object') {
opts = { mode: opts }
}
let mode = opts.mode
const xfs = opts.fs || fs
let stat
if (mode === undefined) {
mode = _0777 & (~process.umask())
}
if (!made) made = null
p = path.resolve(p)
try {
try {
if ((stat = xfs.statSync(p))) {
if (stat.isDirectory()) {
return p
}
xfs.renameSync(p, p + '.index')
}
} catch (e) { }
xfs.mkdirSync(p, mode)
made = made || p
} catch (err0) {
switch (err0.code) {
case 'ENOENT':
made = mkdirp(path.dirname(p), opts, made)
mkdirp(p, opts, made)
break
// In the case of any other error, just see if there's a dir
// there already. If so, then hooray! If not, then something
// is borked.
default:
try {
stat = xfs.statSync(p)
} catch (err1) {
throw err0
}
if (!stat.isDirectory()) throw err0
break
}
}
return made
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment