Skip to content

Instantly share code, notes, and snippets.

@Hais
Created November 24, 2015 08:59
Show Gist options
  • Save Hais/39f8edde07bba5b5787c to your computer and use it in GitHub Desktop.
Save Hais/39f8edde07bba5b5787c to your computer and use it in GitHub Desktop.
camelscraper
csv = require 'csv'
request = require 'superagent'
require('superagent-retry')(request);
cheerio = require 'cheerio'
fs = require 'fs'
async = require 'async'
require('debug-http')()
INFILE = "deals2.csv"
OUTFILE = "OUTPUT-#{INFILE}"
csv.parse fs.readFileSync(INFILE, "UTF-8"), (err, data) ->
jobs = for row in data[1..]
do (row) ->
(done) ->
console.log row[3]
request
.get 'http://uk.camelcamelcamel.com/search'
.query sq: row[3]
.retry 5
.end (err, res) ->
return done null, row if err
$ = cheerio.load res.text
row.push.apply row, ($(".yui3-u-1-2").first().find("table tbody tr").map (i, el) ->
$($(this).find("td").get(1)).text()
).get()
done null, row
async.parallelLimit jobs, 10, (err, result) ->
result.unshift data[0].concat ["Current", "Highest", "Lowest", "Average"]
csv.stringify result, (err, data) ->
fs.writeFile OUTFILE, data, (err) ->
console.log "Written"
{
"dependencies": {
"async": "^1.5.0",
"cheerio": "^0.19.0",
"coffee": "^2.1.0",
"csv": "^0.4.6",
"debug-http": "^0.4.3",
"superagent": "^1.4.0",
"superagent-retry": "^0.5.1"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment