Skip to content

Instantly share code, notes, and snippets.

@max-mapper
Last active August 1, 2018 16:31
Show Gist options
  • Save max-mapper/fd804226f26924794cdb3709d27bc39d to your computer and use it in GitHub Desktop.
Save max-mapper/fd804226f26924794cdb3709d27bc39d to your computer and use it in GitHub Desktop.
data.gov metadata downloader
var request = require('request')
var link = process.argv[2]
var start = process.argv[3]
if (!start) start = 0
else start = +start
dl(link, start, function (err) {
if (err) throw err
console.error('All done')
})
function dl (link, start, cb) {
next()
function next (retries) {
if (typeof retries === 'undefined') retries = 5
var href = link + `&start=${start}`
request({url: href, json: true}, function (err, resp, body) {
if (err || resp.statusCode > 299) {
retries--
if (retries <= 0) {
return cb(new Error('Retry limit exceeded', href))
}
setTimeout(function () {
console.error('Retrying', href, {retries: retries, statusCode: resp.statusCode})
next(retries)
}, 15000)
return
}
console.error(href, resp.statusCode, body.result.results.length, new Date())
if (body.result.results.length === 0) {
return cb()
}
start += body.result.results.length // sometimes i get 998 or 992 results from data.gov api (ಠ‿ಠ)
body.result.results.forEach(function (r) {
console.log(JSON.stringify(r))
})
next()
})
}
}
node download.js "https://catalog.data.gov/api/3/action/package_search?rows=1000" > toplevel-$(date "+%Y%m%d-%H%M%S").json 2> logs/toplevel-$(date "+%Y%m%d-%H%M%S").log
node download.js "https://catalog.data.gov/api/action/package_search?fq=collection_package_id:*&rows=1000" > children-$(date "+%Y%m%d-%H%M%S").json 2> logs/children-$(date "+%Y%m%d-%H%M%S").log
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment