Skip to content

Instantly share code, notes, and snippets.

@jasoncrawford
Last active October 10, 2015 08:16
Show Gist options
  • Save jasoncrawford/83b488033aa571744701 to your computer and use it in GitHub Desktop.
Save jasoncrawford/83b488033aa571744701 to your computer and use it in GitHub Desktop.
Used to create a plot like this: https://plot.ly/~jasoncrawford/17.embed
var fs = require('fs');
var _ = require('underscore');
var csv = require('csv');
var Twitter = require('twitter');
function die(error) {
console.error('fatal error');
if (error) console.error('error', error);
if (error.stack) console.error('stacktrace', error.stack);
process.exit(-1);
}
var env = process.env;
//--------------------------------------------------------------------------------------------------
// Parsing
var energyUnitMap = [
{name: 'kilotons', exponent: 3},
{name: 'megatons', exponent: 6},
{name: 'gigatons', exponent: 9},
]
function transformMatchData(matchData) {
var data = _.pick(matchData, 'name', 'url');
_.each(['distance', 'diameter', 'velocity', 'energy'], function (key) {
data[key] = parseFloat(matchData[key]);
})
var energyUnits = matchData.energyUnits.replace(/\.+$/, ''); // strip trailing ...
var unit = _.find(energyUnitMap, function (entry) {
return entry.name.indexOf(energyUnits) === 0;
})
if (!unit) {
console.error('unknown energy unit:', energyUnits);
return undefined;
}
data.energy = data.energy * Math.pow(10, unit.exponent);
return data;
}
// example:
// asteroid (2010 SX11) missed earth by 15 lunar distances: diameter ~47 m, velocity 7.73 km/s, energy ~1 megaton. http://t.co/VxWFWbZ6c8
var pattern = /asteroid \((.*)\) missed earth by ([\d\.]+) lunar distances: diameter ~?([\d\.]+) m, velocity ([\d\.]+) km\/s, energy ~?([\d\.]+) (.*). (http:\/\/.*)/
var patternKeys = ['name', 'distance', 'diameter', 'velocity', 'energy', 'energyUnits', 'url'];
function parseTweet(tweet) {
var text = tweet.text;
var match = text.match(pattern);
if (!match) {
console.warn('no match for tweet: ' + text);
return undefined;
}
var matchData = _.object(patternKeys, match.slice(1));
var data = transformMatchData(matchData);
if (data) data.timestamp = tweet.created_at;
return data;
}
//--------------------------------------------------------------------------------------------------
// CSV output
var columns = {
name: 'Name',
distance: 'Distance (lunar distances)',
diameter: 'Diameter (m)',
velocity: 'Velocity (km/s)',
energy: 'Energy (tons)',
url: 'URL',
}
var outfile = 'asteroids.csv';
function generateCSV(parsedTweets) {
var stream = fs.createWriteStream(outfile);
csv.stringify(parsedTweets, {columns: columns, header: true}).pipe(stream);
}
//--------------------------------------------------------------------------------------------------
// Twitter & output
var params = {
screen_name: 'AsteroidMisses',
count: 200,
trim_user: 1,
}
function handleResponse(error, tweets, response) {
if (error) die(error);
console.log('tweets:', tweets.length);
var parseResults = _(tweets).map(parseTweet);
var parsedTweets = _(parseResults).compact();
var errorCount = parseResults.length - parsedTweets.length;
console.warn('parse errors:', errorCount);
generateCSV(parsedTweets);
}
var client = new Twitter({
consumer_key: env['TWITTER_CONSUMER_KEY'],
consumer_secret: env['TWITTER_CONSUMER_SECRET'],
access_token_key: env['TWITTER_ACCESS_TOKEN'],
access_token_secret: env['TWITTER_ACCESS_TOKEN_SECRET'],
});
client.get('statuses/user_timeline', params, handleResponse);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment