Created
September 8, 2016 14:27
-
-
Save adamhepton/44bb94eedc9365afbf0e85631c75deb8 to your computer and use it in GitHub Desktop.
Scrape guts out of ebay auctions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var jsdom = require('jsdom'), | |
csv = require('json2csv'), | |
fs = require('fs'), | |
processed = [], | |
urls = process.argv.slice(2), | |
fields = ['title', 'price', 'image', 'date', 'description']; | |
urls.forEach(function(url) { | |
jsdom.env( | |
url, | |
function (err, window) { | |
if(err) { | |
throw(err); | |
} | |
var dateText = window.document.querySelector('#bb_tlft').textContent.trim(); | |
var info = { | |
title: window.document.querySelector('#itemTitle span').nextSibling.textContent.trim(), | |
price: window.document.querySelector('#prcIsum').textContent.trim(), | |
image: window.document.querySelector('#mainImgHldr #icImg').getAttribute('src'), | |
date: !!~dateText.indexOf('Time left') ? 'Ongoing' : new Date(dateText.replace(/\s\s+/, ' ').replace(/(\:\d\d)\s\w\w\w/, '$1')) | |
}; | |
jsdom.env( | |
window.document.querySelector('#desc_ifr').getAttribute('src'), | |
function(err, iframe) { | |
if(err) { | |
throw(err); | |
} | |
var style = iframe.document.querySelector('#ds_div style'), | |
content = iframe.document.querySelector('#ds_div').textContent; | |
style = style ? style.textContent || "" : ""; | |
info.description = content.replace(style, '').trim(); | |
processed.push(info); | |
checkProgress(); | |
}); | |
}); | |
}); | |
var checkProgress = function() { | |
if(processed.length === urls.length) { | |
var today = new Date(), | |
now = { | |
year: today.getFullYear(), | |
month: today.getMonth(), | |
date: today.getDate(), | |
hours: today.getHours(), | |
minutes: today.getMinutes(), | |
seconds: today.getSeconds() | |
}, | |
pad = function(str) { | |
return str.toString().length === 1 ? '0' + str : str; | |
}, | |
filename; | |
Object.keys(now).forEach(function(el) { | |
now[el] = pad(now[el]); | |
return; | |
}); | |
filename = [ | |
'ebay', | |
[now.year, now.month, now.date].join(''), | |
[now.hours, now.minutes, now.seconds].join('') | |
].join('-') + '.csv'; | |
fs.writeFile( | |
filename, | |
csv({ data: processed, fields: fields, del: '\t' }), | |
function(err) { | |
if(err) { | |
return console.log(err); | |
} | |
console.log('Processing finished and saved to', filename); | |
}); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment