Created
February 22, 2021 08:47
-
-
Save vinhjaxt/17839695f2012a4032af4ca4c193fa35 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
const { URL } = require('url') | |
const fs = require('fs') | |
const path = require('path') | |
const noop = () => {} | |
const puppeteer = require('puppeteer-core') | |
process.on('uncaughtException', e => { | |
const d = new Date() | |
const time = d.getDate() + '/' + (d.getMonth() + 1) + '/' + d.getFullYear() + ' ' + d.getHours() + ':' + d.getMinutes() + ':' + d.getSeconds() + '.' + d.getMilliseconds() | |
console.error(time + ': ', e) | |
}).on('unhandledRejection', (reason, promise) => { | |
const d = new Date() | |
const time = d.getDate() + '/' + (d.getMonth() + 1) + '/' + d.getFullYear() + ' ' + d.getHours() + ':' + d.getMinutes() + ':' + d.getSeconds() + '.' + d.getMilliseconds() | |
console.log(time + ' unCatchedPromise: ', reason) | |
}) | |
function download(url, remoteURL, isJsOnly, outputFolder) { | |
puppeteer.connect({ | |
browserURL: remoteURL, | |
defaultViewport: null | |
}).then(async browser => { | |
try { | |
// const context = await browser.createIncognitoBrowserContext() | |
// const context = browser.defaultBrowserContext() | |
// let page = await context.newPage() | |
let page = await browser.newPage() | |
const requestMap = new Map() | |
page.once('close', () => { | |
console.log('====================================') | |
console.log('Done') | |
}) | |
/* | |
page.on('requestfailed', r => { | |
console.log(r._requestId, r._failureText) | |
}) | |
//*/ | |
page.on('response', r => { | |
const url = r.url() | |
if (url.startsWith('chrome-extension://')) return | |
const parsedURL = new URL(url) | |
const resourceType = r.request().resourceType() | |
if (isJsOnly && !['document', 'script', 'xhr', 'fetch', 'eventsource', 'manifest', 'other', 'websocket', 'signedexchange'].includes(resourceUsage)) return | |
console.log(resourceType, url) | |
if (!~parsedURL.hostname.indexOf('.')) return | |
let filePath = path.resolve(outputFolder ? `${outputFolder}/${parsedURL.hostname}/${parsedURL.pathname}` : `output-${parsedURL.hostname}/${parsedURL.pathname}`) | |
if (!parsedURL.pathname || parsedURL.pathname[parsedURL.pathname.length - 1] === '/') { | |
filePath = `${filePath}/index.html` | |
} | |
console.log('OUT: ', filePath) | |
r.buffer().then(buff => { | |
mkdirp(path.dirname(filePath)) | |
fs.writeFile(filePath, buff, noop) | |
}) | |
}) | |
try { | |
// https://stackoverflow.com/questions/53039551/selenium-webdriver-modifying-navigator-webdriver-flag-to-prevent-selenium-detec | |
await page.evaluateOnNewDocument(`Object.defineProperty(navigator, 'webdriver', { get: () => undefined })`) | |
} catch (e) { | |
page.close() | |
throw e | |
} | |
page.goto(url) | |
} catch (e) { | |
console.error(e) | |
} | |
}).catch(e => { | |
console.error(e) | |
console.error('Bật chrome với option --remote-debugging-port=9222 chưa?') | |
}) | |
} | |
!(() => { | |
let url, isJsOnly, outputFolder | |
let remoteURL = '127.0.0.1:9222' | |
for (let i = 2; i < process.argv.length; i++) { | |
const argv = process.argv[i] | |
switch (argv) { | |
case '-js': | |
case '--js': { | |
isJsOnly = true | |
break | |
} | |
case '-o': | |
case '--output': { | |
outputFolder = process.argv[++i] | |
break | |
} | |
case '-a': | |
case '--addr': { | |
remoteURL = process.argv[++i] | |
break | |
} | |
default: | |
url = argv | |
} | |
} | |
if (url) { | |
if (!remoteURL.startsWith('http://')) { | |
if (remoteURL.startsWith('//')) { | |
remoteURL = 'http:' + remoteURL | |
} else { | |
remoteURL = 'http://' + remoteURL | |
} | |
} | |
download(url, remoteURL, isJsOnly, outputFolder) | |
} else { | |
console.error('Usage: node', process.argv[1], '[-js] [-o output-folder] [--addr http://127.0.0.1:9222] https://site_url_to_save') | |
process.exit(1) | |
} | |
})() | |
// mkdirp | |
// const path = require('path') | |
// const fs = require('fs') | |
const { resourceUsage } = require('process') | |
const _0777 = parseInt('0777', 8) | |
function mkdirp (p, opts, made) { | |
if (!opts || typeof opts !== 'object') { | |
opts = { mode: opts } | |
} | |
let mode = opts.mode | |
const xfs = opts.fs || fs | |
let stat | |
if (mode === undefined) { | |
mode = _0777 & (~process.umask()) | |
} | |
if (!made) made = null | |
p = path.resolve(p) | |
try { | |
try { | |
if ((stat = xfs.statSync(p))) { | |
if (stat.isDirectory()) { | |
return p | |
} | |
xfs.renameSync(p, p + '.index') | |
} | |
} catch (e) { } | |
xfs.mkdirSync(p, mode) | |
made = made || p | |
} catch (err0) { | |
switch (err0.code) { | |
case 'ENOENT': | |
made = mkdirp(path.dirname(p), opts, made) | |
mkdirp(p, opts, made) | |
break | |
// In the case of any other error, just see if there's a dir | |
// there already. If so, then hooray! If not, then something | |
// is borked. | |
default: | |
try { | |
stat = xfs.statSync(p) | |
} catch (err1) { | |
throw err0 | |
} | |
if (!stat.isDirectory()) throw err0 | |
break | |
} | |
} | |
return made | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment