Skip to content

Instantly share code, notes, and snippets.

@tylor
Created March 2, 2014 05:11
Show Gist options
  • Save tylor/9302217 to your computer and use it in GitHub Desktop.
Save tylor/9302217 to your computer and use it in GitHub Desktop.
Scrape the CrossFit Open 2014 Results and create a CSV
{
"name": "xf-games-open-14-scrape",
"version": "0.0.0",
"license": "MIT",
"dependencies": {
"request": "~2.34.0",
"cheerio": "~0.13.1",
"async": "~0.2.10"
}
}
request = require 'request'
$ = require 'cheerio'
async = require 'async'
console.log ['ranking', 'name', 'score'].join(', ')
request "http://games.crossfit.com/scores/leaderboard.php?stage=1&sort=0&page=1&division=1&region=0&numberperpage=100&competition=0&frontpage=0&expanded=0&year=14&full=1&showtoggles=0&hidedropdowns=0&showathleteac=1&=&is_mobile=0", (err, response, body) ->
lastPage = parseInt $(body).find('#leaderboard-pager a').first().text(), 10
requestPage = (index, done) ->
request "http://games.crossfit.com/scores/leaderboard.php?stage=1&sort=0&page=#{index}&division=1&region=0&numberperpage=100&competition=0&frontpage=0&expanded=0&year=14&full=1&showtoggles=0&hidedropdowns=0&showathleteac=1&=&is_mobile=0", (err, response, body) ->
rows = $(body).find('tr:not(#lbhead)')
rows.each (id, item) ->
$item = $(item)
ranking = $item.find('.number').text().match(/\((\d+)\)/)[1]
name = $item.find('.name').text()
score = $item.find('.score-cell').first().text().match(/\((\d+)\)/)[1]
console.log [ranking, name, score].join(', ')
done()
async.eachLimit [1..lastPage], 50, requestPage, (err) ->
console.log 'error', err if err
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment