Created
October 18, 2024 18:23
-
-
Save mauvieira/5ff34c678050077b1467fbc6af2045ab to your computer and use it in GitHub Desktop.
ney crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const fs = require('fs'); | |
(async () => { | |
const browser = await puppeteer.launch({ headless: true }); | |
const page = await browser.newPage(); | |
const totalPages = 3; | |
const fileName = 'reviews.txt'; | |
fs.writeFileSync(fileName, '', 'utf-8'); | |
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) { | |
const url = `https://www.amazon.com.br/product-reviews/B09329F4J6/ref=cm_cr_getr_d_paging_btm_prev_${pageNumber}?pageNumber=${pageNumber}`; | |
await page.goto(url); | |
await page.waitForSelector('span[data-hook="review-body"]'); | |
const reviews = await page.$$eval('div[data-hook="review"]', reviews => { | |
return reviews.map(review => { | |
const name = review.querySelector('span.a-profile-name')?.innerText || 'No name'; | |
const starRating = review.querySelector('i[data-hook="review-star-rating"] span')?.innerText || 'No rating'; | |
const reviewDate = review.querySelector('span[data-hook="review-date"]')?.innerText || 'No date'; | |
const reviewText = review.querySelector('span[data-hook="review-body"] span')?.innerText || 'No review text'; | |
return { | |
name, | |
starRating, | |
reviewDate, | |
reviewText | |
}; | |
}); | |
}); | |
let log = `--- Page ${pageNumber} ---\n`; | |
reviews.forEach((review, index) => { | |
log += `Review ${index + 1}:\n`; | |
log += `Name: ${review.name}\n`; | |
log += `Star Rating: ${review.starRating}\n`; | |
log += `Review Date: ${review.reviewDate}\n`; | |
log += `Review Text: ${review.reviewText}\n`; | |
log += `-------------------------------\n`; | |
}); | |
fs.appendFileSync(fileName, log, 'utf-8'); | |
} | |
await browser.close(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment