Skip to content

Instantly share code, notes, and snippets.

@leifermendez
Last active April 13, 2020 08:42
Show Gist options
  • Save leifermendez/cc9df42cb2703a59e592d24f49f817cb to your computer and use it in GitHub Desktop.
Save leifermendez/cc9df42cb2703a59e592d24f49f817cb to your computer and use it in GitHub Desktop.
scrapper-raiting.js
const puppeteer = require('puppeteer');
const xl = require('excel4node');
let link = "https://www.booking.com/hotel/es/alterhome-ifema.es.html";
try {
getReviews(link);
} catch (error) {
console.log(error);
}
async function getReviews(link) {
let info = await process(link);
var wb = new xl.Workbook();
var ws = wb.addWorksheet('Sheet 1');
let c = 1;
let f = 1;
let c_rev = 4; // Cuantas filas ocupa los reviews
console.log(info)
// ws.cell(columnas, filas)
// ws.cell(c,f).string( 'Apartamento' );
// c++;
let style = {
alignment: {
horizontal: 'center',
}
};
ws.cell(c, f).string('Apartamento').style(style);
ws.cell(c + 1, f).string(info.name).style(style);
f++;
for (let i = 0; i < info.reviews.length; i++) {
ws.cell(c, f).string(info.reviews[i].category).style(style);
ws.cell(c + 1, f).number(parseFloat(info.reviews[i].review)).style(style);
f++;
// c++;
// if( (i +1) === c_rev ){
// f = f + 2
// c = 2;
// }
}
wb.write('Reviews.xlsx');
}
function process(link) {
return new Promise(resolv => {
(async() => {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', request => {
if (request.resourceType() === 'image' || request.resourceType() === 'stylesheet' || request.resourceType() === 'font')
request.abort();
else
request.continue();
});
// await page.setViewport({ width: 1920, height: 926 });
await page.goto(link, {
timeout: 0
}).catch(err => {
console.log('error happen at the page: ', err);
});
await page.click('#js--hp-gallery-scorecard > a');
let interval = await page.evaluate(() => {
let data = {};
// let hotel = [];
// get the hotel elements
let hotelsElms = document.querySelectorAll('#right');
// get the hotel data
hotelsElms.forEach((hotelelement) => {
let hotelJson = {};
try {
// hotelJson.name = hotelelement.querySelector('#hp_hotel_name').innerText;
data.name = hotelelement.querySelector('#hp_hotel_name').innerText;
} catch (exception) {}
// hotel.push(hotelJson);
});
let elementsRew = document.querySelectorAll('#review_list_score > div.v2_review-scores__wrapper > div > ul');
let rev = [];
elementsRew.forEach((elements) => {
let li = elements.querySelectorAll('li');
li.forEach((info) => {
let review = {};
try {
review.category = info.querySelector('div > span.c-score-bar__title').innerText;
review.review = info.querySelector('div > span.c-score-bar__score').innerText;
} catch (exception) {
}
rev.push(review);
})
});
data.reviews = rev;
return data;
// return hotel;
}).catch((err) => {
console.log('Error evaluando la pagina: ', err);
}).then((hotel) => {
return hotel;
})
await page.close()
await browser.close()
resolv(interval)
})()
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment