Skip to content

Instantly share code, notes, and snippets.

@esmerino
Created May 15, 2018 06:32
Show Gist options
  • Save esmerino/28714654d9568ecec62b2d4e78b8c00f to your computer and use it in GitHub Desktop.
Save esmerino/28714654d9568ecec62b2d4e78b8c00f to your computer and use it in GitHub Desktop.
const puppeteer = require('puppeteer')
const fs = require('fs')
const amazonScrape = async (productCount, productSearch) => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto('https://www.amazon.com', { waitUntil: 'networkidle2', timeout: 3000000 })
await page.type('#twotabsearchtextbox', `${productSearch}`)
await page.click('input.nav-input')
await page.waitForSelector('div#resultsCol')
await page.waitFor(10000)
let urls = [];
while(true){
if(urls.length >= productCount) break
urls = [...urls, ...(await page.evaluate(() => {
const results = Array.from(document.querySelectorAll("li[id^='result_']"))
.filter(result => {
return result.querySelectorAll('a')[1].href.split('/').includes('www.amazon.com') == true
})
return [].map.call(results, a => a.querySelectorAll('a')[1].href);
}))].slice(0, productCount)
await page.click('#pagnNextString');
await page.waitForSelector('div#resultsCol');
await page.waitFor(10000);
}
let products = []
for (let i = 0; i < urls.length; i++) {
let url = urls[i];
await page.goto(`${url}`, { waitUntil: 'networkidle2', timeout: 3000000 });
await page.waitForSelector('#productTitle');
products.push(await page.evaluate(() => {
if (document.querySelector('#priceblock_ourprice')){
return {
url: url,
title: document.querySelector('#productTitle').textContent.trim(),
price: document.querySelector('#priceblock_ourprice').textContent.trim(),
availability: document.querySelector('#availability').textContent.trim()
}
}
}))
}
fs.writeFileSync('products.json', JSON.stringify(products))
await browser.close()
}
amazonScrape(200, "go pro")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment