Last active
July 21, 2017 18:08
-
-
Save connormanning/6afc89825c87d6263280ee456212084e to your computer and use it in GitHub Desktop.
Greyhound density sampler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Promise = require('bluebird'); | |
var request = require('request-promise'); | |
var argv = require('minimist')(process.argv.slice(2)); | |
if (!argv.r || argv.h) { | |
console.log('\nGREYHOUND DENSITY SAMPLER'); | |
console.log('--------'); | |
console.log( | |
'This script queries randomly selected areas of a resource to\n' + | |
'estimate the density of a dataset. Only queries containing\n' + | |
'points in all 4 corners of the query bounds (calculated using\n' + | |
'the outer 10% of the query width) are considered in this\n' + | |
'analysis'); | |
console.log('\n\nOptions\n--------'); | |
console.log('-r <resource>'); | |
console.log('\tURL base of the resource to sample.'); | |
console.log('\tFor example: data.greyhound.io/resource/dublin\n'); | |
console.log('-d <query width>'); | |
console.log('\tWidth of each query: `-d 10` will query 10x10 unit squares'); | |
console.log('\n\tDefault: 10\n'); | |
console.log('-n <number of queries>'); | |
console.log('\tTotal number of random query samples'); | |
console.log('\n\tDefault: 100\n'); | |
console.log('-t <threads>'); | |
console.log('\tNumber of concurrent queries to run'); | |
console.log('\n\tDefault: 8\n'); | |
console.log('\n\nDependencies\n--------'); | |
console.log('$ npm install bluebird request-promise minimist\n\n'); | |
process.exit(); | |
} | |
var base = argv.r; | |
if (base.slice(0, 'http'.length) != 'http') base = 'http://' + base; | |
if (base.slice(-1) != '/') base = base + '/' | |
var dist = argv.d || 10; | |
var num = argv.n || 100; | |
var threads = argv.t || 8; | |
var schema = [ | |
{ name: 'X', type: 'floating', size: 8 }, | |
{ name: 'Y', type: 'floating', size: 8 }, | |
{ name: 'Z', type: 'floating', size: 8 } | |
]; | |
var pointSize = 24; | |
var responses = 0; | |
var results = []; | |
var stddev = (a) => { | |
var mean = a.reduce((p, c) => p + c, 0) / a.length; | |
return Math.sqrt( | |
a.reduce((p, c) => p + Math.pow(c - mean, 2), 0) / a.length); | |
}; | |
request(base + 'info') | |
.then((res) => { | |
var info = JSON.parse(res); | |
var bc = info.boundsConforming; | |
var bounds = { | |
minx: bc[0], | |
miny: bc[1], | |
maxx: bc[3] - dist, | |
maxy: bc[4] - dist | |
}; | |
bounds.width = bounds.maxx - bounds.minx; | |
bounds.depth = bounds.maxy - bounds.miny; | |
if (bounds.width < dist || bounds.depth < dist) | |
throw new Error('Distance too large'); | |
var queries = []; | |
for (var i = 0; i < num; ++i) { | |
var q = [ | |
Math.round(bounds.minx + Math.random() * bounds.width), | |
Math.round(bounds.miny + Math.random() * bounds.depth) | |
]; | |
queries.push(q.concat(q[0] + dist, q[1] + dist)); | |
} | |
var i = 0; | |
var query = (q) => { | |
if (++i % 10 == 0) console.log(i + ' / ' + num); | |
var url = base + 'read?bounds=' + JSON.stringify(q) + | |
'&schema=' + JSON.stringify(schema); | |
var view = base + 'static' + | |
'?offset=[' + q[0] + ',' + q[1] + ']' + | |
'&bounds=[0,0,' + dist + ',' + dist + ']'; | |
// Only if we have points in all 4 outer corners of our query will we | |
// include this response. | |
return request({ url: url, encoding: null }) | |
.then((buf) => { | |
var points = buf.readUInt32LE(buf.length - 4); | |
var corners = [false, false, false, false]; | |
for (var o = 0; o < points * pointSize; o += pointSize) { | |
var x = buf.readDoubleLE(o), | |
y = buf.readDoubleLE(o + 8); | |
if (x < q[0] + dist / 10) { | |
if (y < q[1] + dist / 10) corners[0] = true; | |
else if (y > q[3] - dist / 10) corners[1] = true; | |
} | |
else if (x > q[2] - dist / 10) { | |
if (y < q[1] + dist / 10) corners[2] = true; | |
else if (y > q[3] - dist / 10) corners[3] = true; | |
} | |
if (corners.every((v) => v)) { | |
return { points: points, url: view }; | |
} | |
} | |
return { points: 0, url: view }; | |
}); | |
}; | |
return Promise.map(queries, query, { concurrency: threads }); | |
}) | |
// Filter out non-conforming responses. | |
.then((results) => results.filter((v) => v.points)) | |
.then((results) => { | |
var points = results.reduce((p, c) => p + c.points, 0); | |
var div = Math.pow(dist, 2); | |
var ppSqUnit = results.map((v) => v.points / div); | |
console.log(); | |
console.log('Usable queries:', results.length); | |
console.log('Total points fetched:', points); | |
console.log('Average points/unit:', | |
(points / results.length / div).toFixed(2)); | |
console.log('Standard dev points/unit:', stddev(ppSqUnit).toFixed(2)); | |
var max = { points: 0 }; | |
results.forEach((v) => { if (v.points > max.points) max = v; }); | |
console.log(); | |
console.log('Densest view:', max.url); | |
console.log('Densest sample points/unit:', (max.points / div).toFixed(2)); | |
}); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ node sample.js -d 10 -n 1000 -t 12 -r data.greyhound.io/resource/dublin | |
10 / 1000 | |
20 / 1000 | |
... | |
990 / 1000 | |
1000 / 1000 | |
Usable queries: 482 | |
Total points fetched: 4552796 | |
Average points/unit: 94.46 | |
Standard dev points/unit: 57.85 | |
Densest view: http://data.greyhound.io/resource/dublin/static?offset=[-696592,7045943]&bounds=[0,0,10,10] | |
Densest sample points/unit: 412.01 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment