Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save marcuswestin/3006984 to your computer and use it in GitHub Desktop.
Save marcuswestin/3006984 to your computer and use it in GitHub Desktop.
Stay up to date on funding rounds
// sudo npm install request && sudo npm install jsdom
var request = require('request')
var jsdom = require('jsdom')
var pages = process.argv.slice(2)
if (!pages.length) { pages = [0] }
console.error('Scrape Crunchbase funding rounds. Pages:', pages)
var pagesData = []
for (var i=0; i<pages.length; i++) {
;(function next(page) {
console.error("Scrape page:", page)
jsdom.env({
html:'http://www.crunchbase.com/funding-rounds?page='+page+'&q=all',
scripts:'http://code.jquery.com/jquery-1.7.2.min.js',
done: function(errors, win) {
var $ = win.$
var t = function(el) { return $.trim($(el).text()) }
console.log("\n\nPage", page+':')
var data = []
$('#col2_internal tr').each(function(i) {
if (i == 0) { return } // header
var td = $(this).find('td')
var round = {
company: t($(td[1]).find('a')),
round: t(td[2]),
amount: t(td[3]),
investors: $.map($(td[4]).find('a'), function(a) { return t(a) })
}
data.push(round)
console.log(round.company, round.amount, round.round, '('+round.investors.join(', ')+')')
})
pagesData.push(data)
if (pagesData.length == pages.length) {
console.error("Done!")
return process.exit(0)
}
}
})
})(pages[i]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment