Created
December 3, 2023 17:22
-
-
Save advanceboy/5ad3f80eb31e1b26d3ecff2ee6d51487 to your computer and use it in GitHub Desktop.
ブラウザ操作中の通信の内容をローカルファイルに自動で保存する
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Run the Chrome Devtools Protocol via puppeteer-core to automatically save the contents of network communications to a local file. | |
const puppeteer = require('puppeteer-core'); | |
const fs = require('fs/promises'); | |
const path = require('path'); | |
const [nodePath, scriptPath, ...args] = process.argv; | |
const how2useMsg = " node intercept-requests.js outputDir [urlPattern] [matchRegExp]\n outputDir: Output Directory\n urlPattern: If specified, only URLs matched with wildcards will be fetched. see https://chromedevtools.github.io/devtools-protocol/tot/Fetch/#type-RequestPattern . It's better to specify this for performance reasons.\n matchRegExp: If specified, only URLs that match regular expressions will be saved."; | |
if (args.length < 1) { | |
console.log(`one or more arguments are required.\n\n${how2useMsg}`); | |
process.exit(1); | |
} | |
(async () => { | |
try { | |
const dirStat = await fs.stat(args[0]); | |
if (!dirStat.isDirectory()) { throw {}; } | |
} catch { | |
console.log(`invalid directory: ${args[0]}\n\n${how2useMsg}`); | |
process.exit(1); | |
} | |
let urlPattern = '*'; | |
if (args.length >= 2) { | |
urlPattern = args[1]; | |
} | |
let matchRegExp = null; | |
if (args.length >= 3) { | |
try { | |
matchRegExp = new RegExp(args[2]); | |
} catch { } | |
} | |
const browser = await puppeteer.launch({ | |
args: ['--guest'], | |
channel: 'chrome', | |
defaultViewport: null, | |
headless: false, | |
product: 'chrome' | |
}); | |
// if use firefox | |
// const browser = await puppeteer.launch({ | |
// args: ['-private'], | |
// defaultViewport: null, | |
// executablePath: String.raw`C:\Program Files\Mozilla Firefox\firefox.exe`, | |
// headless: false, | |
// product: 'firefox' | |
// }); | |
const page = (await browser.pages())[0]; | |
try { | |
const client = await page.target().createCDPSession(); | |
await client.send('Fetch.enable', { 'patterns': [{ urlPattern, 'requestStage': 'Response' }] }); | |
client.on('Fetch.requestPaused', async (requestEvent) => { | |
const { request, requestId, responseStatusCode, responseHeaders } = requestEvent; | |
try { | |
if (!responseStatusCode) throw `responseStatusCode: ${responseStatusCode}`; | |
if (!matchRegExp || matchRegExp.test(request.url)) { | |
// resolve file path | |
let fileName = null, contentType = null; | |
responseHeaders.forEach(h => { | |
switch (h.name.toLowerCase()) { | |
case 'content-disposition': | |
fileName = h.value; | |
break; | |
case 'content-type': | |
contentType = h.value.toLowerCase().split(';')[0]; | |
break; | |
} | |
}); | |
fileName = fileName || new URL(request.url).pathname.split('/').pop(); | |
if (!path.extname(fileName) && contentType) { | |
let match; | |
fileName += | |
contentType == 'text/plain' ? '.txt' : | |
contentType == 'text/javascript' ? '.js' : | |
contentType == 'text/css' ? '.css' : | |
contentType == 'text/xml' ? '.xml' : | |
contentType == 'application/json' ? '.json' : | |
contentType == 'application/zip' ? '.zip' : | |
(match = contentType.match(/^image\/(.*?)(?:\+.*)?$/)) ? `.${match[1]}` : | |
(match = contentType.match(/^text\/html(?=$|;)/)) ? '.html' : | |
''; | |
} | |
const writeBase = path.join(args[0], fileName); | |
let writePath = writeBase; | |
let fileCounter = 0; | |
while (true) { | |
try { | |
const fileStat = await fs.stat(writePath); | |
const ext = path.extname(writeBase); | |
writePath = writeBase.substring(0, writeBase.length - ext.length) + `~${++fileCounter}` + ext; | |
} catch { | |
break; | |
} | |
} | |
const f = await fs.open(writePath, 'w'); | |
try { | |
// dump response body | |
const response = await client.send('Fetch.getResponseBody', { requestId }); | |
const buff = Buffer.from(response.body, response.base64Encoded ? 'base64' : 'utf-8'); | |
await f.write(buff); | |
} finally { | |
f?.close(); | |
} | |
} | |
await client.send('Fetch.fulfillRequest', { requestId, responseCode: responseStatusCode, responseHeaders, 'body': response.body }); | |
} catch { | |
await client.send('Fetch.continueRequest', { requestId }); | |
} | |
}); | |
// wait for close | |
await new Promise((resolve, reject) => page.on('close', resolve)); | |
} finally { | |
await Promise.all((await browser.pages()).map(p => p.close())); | |
await browser.close(); | |
} | |
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"dependencies": { | |
"puppeteer-core": ">=21.0.0" | |
}, | |
"engines": { | |
"node": ">=14.8" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment