Skip to content

Instantly share code, notes, and snippets.

@mauvieira
Created October 18, 2024 18:23
Show Gist options
  • Save mauvieira/5ff34c678050077b1467fbc6af2045ab to your computer and use it in GitHub Desktop.
Save mauvieira/5ff34c678050077b1467fbc6af2045ab to your computer and use it in GitHub Desktop.
ney crawler
const puppeteer = require('puppeteer');
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
const totalPages = 3;
const fileName = 'reviews.txt';
fs.writeFileSync(fileName, '', 'utf-8');
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
const url = `https://www.amazon.com.br/product-reviews/B09329F4J6/ref=cm_cr_getr_d_paging_btm_prev_${pageNumber}?pageNumber=${pageNumber}`;
await page.goto(url);
await page.waitForSelector('span[data-hook="review-body"]');
const reviews = await page.$$eval('div[data-hook="review"]', reviews => {
return reviews.map(review => {
const name = review.querySelector('span.a-profile-name')?.innerText || 'No name';
const starRating = review.querySelector('i[data-hook="review-star-rating"] span')?.innerText || 'No rating';
const reviewDate = review.querySelector('span[data-hook="review-date"]')?.innerText || 'No date';
const reviewText = review.querySelector('span[data-hook="review-body"] span')?.innerText || 'No review text';
return {
name,
starRating,
reviewDate,
reviewText
};
});
});
let log = `--- Page ${pageNumber} ---\n`;
reviews.forEach((review, index) => {
log += `Review ${index + 1}:\n`;
log += `Name: ${review.name}\n`;
log += `Star Rating: ${review.starRating}\n`;
log += `Review Date: ${review.reviewDate}\n`;
log += `Review Text: ${review.reviewText}\n`;
log += `-------------------------------\n`;
});
fs.appendFileSync(fileName, log, 'utf-8');
}
await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment