-
-
Save jancurn/05ed1ce414ca1f8f5a2e5f1eb4b1a2c6 to your computer and use it in GitHub Desktop.
const puppeteer = require('puppeteer'); | |
const proxyChain = require('proxy-chain'); | |
(async() => { | |
const oldProxyUrl = 'http://bob:[email protected]:8000'; | |
const newProxyUrl = await proxyChain.anonymizeProxy(oldProxyUrl); | |
// Prints something like "http://127.0.0.1:45678" | |
console.log(newProxyUrl); | |
const browser = await puppeteer.launch({ | |
args: [`--proxy-server=${newProxyUrl}`], | |
}); | |
// Do your magic here... | |
const page = await browser.newPage(); | |
await page.goto('https://www.example.com'); | |
await page.screenshot({ path: 'example.png' }); | |
await browser.close(); | |
// Clean up, forcibly close all pending connections | |
await proxyChain.closeAnonymizedProxy(newProxyUrl, true); | |
})(); |
So there is this https://github.com/gajus/puppeteer-proxy
It listens to nodejs and intercepts the communication and changes the proxy. It seems like the per page limitation is a chrome one.
To intercept HTTPS communication, you'd need to use a man-in-the-middle proxy with a custom self-signed certificate, which quite painful to setup and insecure. To support proxy IP address rotation in PuppeteerCrawler in Apify SDK, we simply start new Chromium browser instances with new proxy settings. BTW this is implemented out-of-the-box in Apify's Web Scraper (apify/web-scraper
).
Hi @jancurn will the proxy server close by itself?
@gsouf Unfortunately not, you need to call closeAnonymizedProxy
. I'll update the Gist
@jancurn I am trying to run puppeteer with proxy chain package on aws lambda but I am getting this error message:
"errorType": "Error",
"errorMessage": "Protocol error (Target.createTarget): Target closed.",
code:
const chromium = require('chrome-aws-lambda');
const { addExtra } = require("puppeteer-extra");
const puppeteerExtra = addExtra(chromium.puppeteer);
const proxyChain = require('proxy-chain');
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteerExtra.use(StealthPlugin());
exports.handler = async (event, context, callback) => {
let finalResult = [];
const url = ``;
let browser;
const oldProxyUrl = ''; // --> bright data proxy
const newProxyUrl = await proxyChain.anonymizeProxy(oldProxyUrl);
console.log("newProxyUrl", newProxyUrl)
try {
browser = await puppeteerExtra.launch({
args: ['--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${newProxyUrl}`],
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless
});
const page = await browser.newPage();
await page.goto(url);
finalResult = await extractElements(page);
} catch (error) {
return callback(error);
} finally {
await browser.close();
}
return callback(null, finalResult);
};
Please, would you have any insight into rotating proxies per page?
I am trying to modify the below code: