Skip to content

Instantly share code, notes, and snippets.

@pthrasher
Created February 3, 2012 16:34
Show Gist options
  • Save pthrasher/1731001 to your computer and use it in GitHub Desktop.
Save pthrasher/1731001 to your computer and use it in GitHub Desktop.
Fetches list of top 1000 companies using node, the jsdom module, and coffee-script.
jsdom = require 'jsdom'
dataz = []
listings = 1000
http_host = "http://www.stormscape.com"
path = "/inspiration/website-lists/global1000/"
# Build up the list of url's to visit.
urls = for page in [0..9]
file = "index.php?pageNum_list=#{ page }&totalRows_list=1000&display=names"
"#{ http_host }#{ path }#{ file }"
for url in urls
jsdom.env
html: url
scripts:
'https://raw.github.com/ded/qwery/master/qwery.min.js'
done: (errors, window) ->
q = window.qwery
# Two list comprehensions, and a concat... Also, skip the first tr... Its
# the table header.
dataz.push (td.textContent for td in (q 'td', tr)) for tr in (q "table.type1 tr")[1..]
# Have we reached the record requirement?
if dataz.length is listings
# Quick little sort on the company ranking.
dataz.sort (a,b) ->
+a[0] - +b[0]
# Put the headers back in.
console.log "Ranking, Company"
for row in dataz
console.log "#{ row[0] }, #{ row[1] }"
(function() {
var dataz, file, http_host, jsdom, listings, page, path, url, urls, _i, _len;
jsdom = require('jsdom');
dataz = [];
listings = 1000;
http_host = "http://www.stormscape.com";
path = "/inspiration/website-lists/global1000/";
urls = (function() {
var _results;
_results = [];
for (page = 0; page <= 9; page++) {
file = "index.php?pageNum_list=" + page + "&totalRows_list=1000&display=names";
_results.push("" + http_host + path + file);
}
return _results;
})();
for (_i = 0, _len = urls.length; _i < _len; _i++) {
url = urls[_i];
jsdom.env({
html: url,
scripts: 'https://raw.github.com/ded/qwery/master/qwery.min.js',
done: function(errors, window) {
var q, row, td, tr, _j, _k, _len2, _len3, _ref, _results;
q = window.qwery;
_ref = (q("table.type1 tr")).slice(1);
for (_j = 0, _len2 = _ref.length; _j < _len2; _j++) {
tr = _ref[_j];
dataz.push((function() {
var _k, _len3, _ref2, _results;
_ref2 = q('td', tr);
_results = [];
for (_k = 0, _len3 = _ref2.length; _k < _len3; _k++) {
td = _ref2[_k];
_results.push(td.textContent);
}
return _results;
})());
}
if (dataz.length === listings) {
dataz.sort(function(a, b) {
return +a[0] - +b[0];
});
console.log("Ranking, Company");
_results = [];
for (_k = 0, _len3 = dataz.length; _k < _len3; _k++) {
row = dataz[_k];
_results.push(console.log("" + row[0] + ", " + row[1]));
}
return _results;
}
}
});
}
}).call(this);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment