Created
February 3, 2012 16:34
-
-
Save pthrasher/1731001 to your computer and use it in GitHub Desktop.
Fetches list of top 1000 companies using node, the jsdom module, and coffee-script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| jsdom = require 'jsdom' | |
| dataz = [] | |
| listings = 1000 | |
| http_host = "http://www.stormscape.com" | |
| path = "/inspiration/website-lists/global1000/" | |
| # Build up the list of url's to visit. | |
| urls = for page in [0..9] | |
| file = "index.php?pageNum_list=#{ page }&totalRows_list=1000&display=names" | |
| "#{ http_host }#{ path }#{ file }" | |
| for url in urls | |
| jsdom.env | |
| html: url | |
| scripts: | |
| 'https://raw.github.com/ded/qwery/master/qwery.min.js' | |
| done: (errors, window) -> | |
| q = window.qwery | |
| # Two list comprehensions, and a concat... Also, skip the first tr... Its | |
| # the table header. | |
| dataz.push (td.textContent for td in (q 'td', tr)) for tr in (q "table.type1 tr")[1..] | |
| # Have we reached the record requirement? | |
| if dataz.length is listings | |
| # Quick little sort on the company ranking. | |
| dataz.sort (a,b) -> | |
| +a[0] - +b[0] | |
| # Put the headers back in. | |
| console.log "Ranking, Company" | |
| for row in dataz | |
| console.log "#{ row[0] }, #{ row[1] }" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (function() { | |
| var dataz, file, http_host, jsdom, listings, page, path, url, urls, _i, _len; | |
| jsdom = require('jsdom'); | |
| dataz = []; | |
| listings = 1000; | |
| http_host = "http://www.stormscape.com"; | |
| path = "/inspiration/website-lists/global1000/"; | |
| urls = (function() { | |
| var _results; | |
| _results = []; | |
| for (page = 0; page <= 9; page++) { | |
| file = "index.php?pageNum_list=" + page + "&totalRows_list=1000&display=names"; | |
| _results.push("" + http_host + path + file); | |
| } | |
| return _results; | |
| })(); | |
| for (_i = 0, _len = urls.length; _i < _len; _i++) { | |
| url = urls[_i]; | |
| jsdom.env({ | |
| html: url, | |
| scripts: 'https://raw.github.com/ded/qwery/master/qwery.min.js', | |
| done: function(errors, window) { | |
| var q, row, td, tr, _j, _k, _len2, _len3, _ref, _results; | |
| q = window.qwery; | |
| _ref = (q("table.type1 tr")).slice(1); | |
| for (_j = 0, _len2 = _ref.length; _j < _len2; _j++) { | |
| tr = _ref[_j]; | |
| dataz.push((function() { | |
| var _k, _len3, _ref2, _results; | |
| _ref2 = q('td', tr); | |
| _results = []; | |
| for (_k = 0, _len3 = _ref2.length; _k < _len3; _k++) { | |
| td = _ref2[_k]; | |
| _results.push(td.textContent); | |
| } | |
| return _results; | |
| })()); | |
| } | |
| if (dataz.length === listings) { | |
| dataz.sort(function(a, b) { | |
| return +a[0] - +b[0]; | |
| }); | |
| console.log("Ranking, Company"); | |
| _results = []; | |
| for (_k = 0, _len3 = dataz.length; _k < _len3; _k++) { | |
| row = dataz[_k]; | |
| _results.push(console.log("" + row[0] + ", " + row[1])); | |
| } | |
| return _results; | |
| } | |
| } | |
| }); | |
| } | |
| }).call(this); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment