Created
November 10, 2016 18:34
-
-
Save rodrigonehring/30e4df6cc545c54d183ba6dfd1e03af5 to your computer and use it in GitHub Desktop.
Scraping kabum, terabyte and pichau product info
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const request = require('request'); | |
const cheerio = require('cheerio'); | |
function priceToNumber(str) { | |
if (!str) | |
return 0; | |
if (typeof str !== 'string') | |
return str; | |
if (str.split(' ').filter(item => item == 'R$').length > 1) | |
str = str.replace(/R\$/g, ''); | |
let value = str.substr(str.indexOf('R$') + 2); | |
value = value.replace('.', ''); | |
value = value.replace(',', '.'); | |
return parseFloat(value); | |
} | |
const getData = url => { | |
return new Promise((resolve, reject) => { | |
request(url, function(error, response, html) { | |
if (error) | |
reject(error); | |
const $ = cheerio.load(html); | |
let result = {}; | |
if (url.indexOf('kabum') !== -1) | |
result = kabum($, result); | |
else if (url.indexOf('pichau') !== -1) | |
result = pichau($, result); | |
else if (url.indexOf('terabyte') !== -1) | |
result = terabyte($, result); | |
else | |
result.error = true; | |
result.url = url; | |
result.price = priceToNumber(result.price); | |
result.price2 = priceToNumber(result.price2); | |
resolve(result); | |
}); | |
}) | |
} | |
const kabum = ($, result) => { | |
result.title = $('h1.titulo_det').text(); | |
result.price = $('.preco_desconto strong').text(); | |
result.price2 = $('.preco_normal').text().trim(); | |
result.images = []; | |
$('#imagem-slide li img').map(function() { | |
result.images.push($(this).attr('src')); | |
}); | |
return result; | |
} | |
const pichau = ($, result) => { | |
result.title = $('.product-details h2').text().trim(); | |
result.price = $('.payment .boleto span').first().text().trim(); | |
result.price2 = $('.other .valorcartao').text().trim(); | |
result.images = []; | |
$('.product-images ul.slides li img').map(function() { | |
result.images.push($(this).attr('src')); | |
}); | |
return result; | |
} | |
function multiplyParcels(str, times) { | |
return priceToNumber(str) * times; | |
} | |
const terabyte = ($, result) => { | |
result.title = $('h1.tit-prod').text().trim(); | |
result.price = $('p.val-prod').first().text().trim(); | |
result.price2 = $('.val-parc span').text().trim(); | |
result.images = []; | |
$('#Carousel img').map(function() { | |
result.images.push($(this).attr('src')); | |
}); | |
result.price2 = multiplyParcels(result.price2, 12); | |
return result; | |
} | |
const urls = [ | |
'http://www.pichau.com.br/placa-de-video-zotac-geforce-gtx-1060-3gb-mini-zt-p10610a-10l-box', | |
'http://www.terabyteshop.com.br/produto/6675/placa-de-video-zotac-geforce-gtx-1060-mini-6gb-zt-p10600a-10l-gddr5-pci-exp', | |
'http://www.kabum.com.br/produto/80910/placa-de-video-vga-zotac-gtx-1060-3gb-192bits-8008mhz-dvi-hdmi-dp-zt-p10610a-10l/?tag=1060', | |
]; | |
urls.forEach(url => getData(url).then(console.log)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment