Created
May 15, 2018 06:32
-
-
Save esmerino/28714654d9568ecec62b2d4e78b8c00f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer') | |
const fs = require('fs') | |
const amazonScrape = async (productCount, productSearch) => { | |
const browser = await puppeteer.launch() | |
const page = await browser.newPage() | |
await page.goto('https://www.amazon.com', { waitUntil: 'networkidle2', timeout: 3000000 }) | |
await page.type('#twotabsearchtextbox', `${productSearch}`) | |
await page.click('input.nav-input') | |
await page.waitForSelector('div#resultsCol') | |
await page.waitFor(10000) | |
let urls = []; | |
while(true){ | |
if(urls.length >= productCount) break | |
urls = [...urls, ...(await page.evaluate(() => { | |
const results = Array.from(document.querySelectorAll("li[id^='result_']")) | |
.filter(result => { | |
return result.querySelectorAll('a')[1].href.split('/').includes('www.amazon.com') == true | |
}) | |
return [].map.call(results, a => a.querySelectorAll('a')[1].href); | |
}))].slice(0, productCount) | |
await page.click('#pagnNextString'); | |
await page.waitForSelector('div#resultsCol'); | |
await page.waitFor(10000); | |
} | |
let products = [] | |
for (let i = 0; i < urls.length; i++) { | |
let url = urls[i]; | |
await page.goto(`${url}`, { waitUntil: 'networkidle2', timeout: 3000000 }); | |
await page.waitForSelector('#productTitle'); | |
products.push(await page.evaluate(() => { | |
if (document.querySelector('#priceblock_ourprice')){ | |
return { | |
url: url, | |
title: document.querySelector('#productTitle').textContent.trim(), | |
price: document.querySelector('#priceblock_ourprice').textContent.trim(), | |
availability: document.querySelector('#availability').textContent.trim() | |
} | |
} | |
})) | |
} | |
fs.writeFileSync('products.json', JSON.stringify(products)) | |
await browser.close() | |
} | |
amazonScrape(200, "go pro") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment