Last active
April 13, 2020 08:42
-
-
Save leifermendez/cc9df42cb2703a59e592d24f49f817cb to your computer and use it in GitHub Desktop.
scrapper-raiting.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const xl = require('excel4node'); | |
let link = "https://www.booking.com/hotel/es/alterhome-ifema.es.html"; | |
try { | |
getReviews(link); | |
} catch (error) { | |
console.log(error); | |
} | |
async function getReviews(link) { | |
let info = await process(link); | |
var wb = new xl.Workbook(); | |
var ws = wb.addWorksheet('Sheet 1'); | |
let c = 1; | |
let f = 1; | |
let c_rev = 4; // Cuantas filas ocupa los reviews | |
console.log(info) | |
// ws.cell(columnas, filas) | |
// ws.cell(c,f).string( 'Apartamento' ); | |
// c++; | |
let style = { | |
alignment: { | |
horizontal: 'center', | |
} | |
}; | |
ws.cell(c, f).string('Apartamento').style(style); | |
ws.cell(c + 1, f).string(info.name).style(style); | |
f++; | |
for (let i = 0; i < info.reviews.length; i++) { | |
ws.cell(c, f).string(info.reviews[i].category).style(style); | |
ws.cell(c + 1, f).number(parseFloat(info.reviews[i].review)).style(style); | |
f++; | |
// c++; | |
// if( (i +1) === c_rev ){ | |
// f = f + 2 | |
// c = 2; | |
// } | |
} | |
wb.write('Reviews.xlsx'); | |
} | |
function process(link) { | |
return new Promise(resolv => { | |
(async() => { | |
const browser = await puppeteer.launch({ | |
headless: true | |
}); | |
const page = await browser.newPage(); | |
await page.setRequestInterception(true); | |
page.on('request', request => { | |
if (request.resourceType() === 'image' || request.resourceType() === 'stylesheet' || request.resourceType() === 'font') | |
request.abort(); | |
else | |
request.continue(); | |
}); | |
// await page.setViewport({ width: 1920, height: 926 }); | |
await page.goto(link, { | |
timeout: 0 | |
}).catch(err => { | |
console.log('error happen at the page: ', err); | |
}); | |
await page.click('#js--hp-gallery-scorecard > a'); | |
let interval = await page.evaluate(() => { | |
let data = {}; | |
// let hotel = []; | |
// get the hotel elements | |
let hotelsElms = document.querySelectorAll('#right'); | |
// get the hotel data | |
hotelsElms.forEach((hotelelement) => { | |
let hotelJson = {}; | |
try { | |
// hotelJson.name = hotelelement.querySelector('#hp_hotel_name').innerText; | |
data.name = hotelelement.querySelector('#hp_hotel_name').innerText; | |
} catch (exception) {} | |
// hotel.push(hotelJson); | |
}); | |
let elementsRew = document.querySelectorAll('#review_list_score > div.v2_review-scores__wrapper > div > ul'); | |
let rev = []; | |
elementsRew.forEach((elements) => { | |
let li = elements.querySelectorAll('li'); | |
li.forEach((info) => { | |
let review = {}; | |
try { | |
review.category = info.querySelector('div > span.c-score-bar__title').innerText; | |
review.review = info.querySelector('div > span.c-score-bar__score').innerText; | |
} catch (exception) { | |
} | |
rev.push(review); | |
}) | |
}); | |
data.reviews = rev; | |
return data; | |
// return hotel; | |
}).catch((err) => { | |
console.log('Error evaluando la pagina: ', err); | |
}).then((hotel) => { | |
return hotel; | |
}) | |
await page.close() | |
await browser.close() | |
resolv(interval) | |
})() | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment