Skip to content

Instantly share code, notes, and snippets.

@connormanning
Last active July 21, 2017 18:08
Show Gist options
  • Save connormanning/6afc89825c87d6263280ee456212084e to your computer and use it in GitHub Desktop.
Save connormanning/6afc89825c87d6263280ee456212084e to your computer and use it in GitHub Desktop.
Greyhound density sampler
var Promise = require('bluebird');
var request = require('request-promise');
var argv = require('minimist')(process.argv.slice(2));
if (!argv.r || argv.h) {
console.log('\nGREYHOUND DENSITY SAMPLER');
console.log('--------');
console.log(
'This script queries randomly selected areas of a resource to\n' +
'estimate the density of a dataset. Only queries containing\n' +
'points in all 4 corners of the query bounds (calculated using\n' +
'the outer 10% of the query width) are considered in this\n' +
'analysis');
console.log('\n\nOptions\n--------');
console.log('-r <resource>');
console.log('\tURL base of the resource to sample.');
console.log('\tFor example: data.greyhound.io/resource/dublin\n');
console.log('-d <query width>');
console.log('\tWidth of each query: `-d 10` will query 10x10 unit squares');
console.log('\n\tDefault: 10\n');
console.log('-n <number of queries>');
console.log('\tTotal number of random query samples');
console.log('\n\tDefault: 100\n');
console.log('-t <threads>');
console.log('\tNumber of concurrent queries to run');
console.log('\n\tDefault: 8\n');
console.log('\n\nDependencies\n--------');
console.log('$ npm install bluebird request-promise minimist\n\n');
process.exit();
}
var base = argv.r;
if (base.slice(0, 'http'.length) != 'http') base = 'http://' + base;
if (base.slice(-1) != '/') base = base + '/'
var dist = argv.d || 10;
var num = argv.n || 100;
var threads = argv.t || 8;
var schema = [
{ name: 'X', type: 'floating', size: 8 },
{ name: 'Y', type: 'floating', size: 8 },
{ name: 'Z', type: 'floating', size: 8 }
];
var pointSize = 24;
var responses = 0;
var results = [];
var stddev = (a) => {
var mean = a.reduce((p, c) => p + c, 0) / a.length;
return Math.sqrt(
a.reduce((p, c) => p + Math.pow(c - mean, 2), 0) / a.length);
};
request(base + 'info')
.then((res) => {
var info = JSON.parse(res);
var bc = info.boundsConforming;
var bounds = {
minx: bc[0],
miny: bc[1],
maxx: bc[3] - dist,
maxy: bc[4] - dist
};
bounds.width = bounds.maxx - bounds.minx;
bounds.depth = bounds.maxy - bounds.miny;
if (bounds.width < dist || bounds.depth < dist)
throw new Error('Distance too large');
var queries = [];
for (var i = 0; i < num; ++i) {
var q = [
Math.round(bounds.minx + Math.random() * bounds.width),
Math.round(bounds.miny + Math.random() * bounds.depth)
];
queries.push(q.concat(q[0] + dist, q[1] + dist));
}
var i = 0;
var query = (q) => {
if (++i % 10 == 0) console.log(i + ' / ' + num);
var url = base + 'read?bounds=' + JSON.stringify(q) +
'&schema=' + JSON.stringify(schema);
var view = base + 'static' +
'?offset=[' + q[0] + ',' + q[1] + ']' +
'&bounds=[0,0,' + dist + ',' + dist + ']';
// Only if we have points in all 4 outer corners of our query will we
// include this response.
return request({ url: url, encoding: null })
.then((buf) => {
var points = buf.readUInt32LE(buf.length - 4);
var corners = [false, false, false, false];
for (var o = 0; o < points * pointSize; o += pointSize) {
var x = buf.readDoubleLE(o),
y = buf.readDoubleLE(o + 8);
if (x < q[0] + dist / 10) {
if (y < q[1] + dist / 10) corners[0] = true;
else if (y > q[3] - dist / 10) corners[1] = true;
}
else if (x > q[2] - dist / 10) {
if (y < q[1] + dist / 10) corners[2] = true;
else if (y > q[3] - dist / 10) corners[3] = true;
}
if (corners.every((v) => v)) {
return { points: points, url: view };
}
}
return { points: 0, url: view };
});
};
return Promise.map(queries, query, { concurrency: threads });
})
// Filter out non-conforming responses.
.then((results) => results.filter((v) => v.points))
.then((results) => {
var points = results.reduce((p, c) => p + c.points, 0);
var div = Math.pow(dist, 2);
var ppSqUnit = results.map((v) => v.points / div);
console.log();
console.log('Usable queries:', results.length);
console.log('Total points fetched:', points);
console.log('Average points/unit:',
(points / results.length / div).toFixed(2));
console.log('Standard dev points/unit:', stddev(ppSqUnit).toFixed(2));
var max = { points: 0 };
results.forEach((v) => { if (v.points > max.points) max = v; });
console.log();
console.log('Densest view:', max.url);
console.log('Densest sample points/unit:', (max.points / div).toFixed(2));
});
$ node sample.js -d 10 -n 1000 -t 12 -r data.greyhound.io/resource/dublin
10 / 1000
20 / 1000
...
990 / 1000
1000 / 1000
Usable queries: 482
Total points fetched: 4552796
Average points/unit: 94.46
Standard dev points/unit: 57.85
Densest view: http://data.greyhound.io/resource/dublin/static?offset=[-696592,7045943]&bounds=[0,0,10,10]
Densest sample points/unit: 412.01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment