Created
December 16, 2012 23:07
-
-
Save tylor/4314006 to your computer and use it in GitHub Desktop.
Scan your Twitter archive to get a basic count of the people you've talked to and the words that you've used.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Scan your official Twitter archive to get a basic count of the | |
* people you've talked to and the words that you've used. | |
* | |
* $ node tweet-counts.js ~/path/to/tweets-archive/ | |
* | |
*/ | |
var fs = require('fs'); | |
var base = process.argv[2].replace(/\/$/, ''); // Strip trailing slash. | |
var Grailbird = { data: {} }; // The Twitter archive uses this. | |
var statistics = { | |
total: 0, | |
text: {}, | |
mentions: {}, | |
hashtags: {}, | |
sources: {}, | |
hourly: {}, | |
daily: {}, | |
} | |
function addOne(statistic, key) { | |
statistic[key] = (statistic[key] || 0) + 1; | |
} | |
// Gross eval... load up the index. | |
eval(fs.readFileSync(base + '/data/js/tweet_index.js').toString()); | |
tweet_index.forEach(function(tweet_file) { | |
// Gross eval... load up a monthly tweet archive. | |
eval(fs.readFileSync(base + '/' + tweet_file.file_name).toString()); | |
Grailbird.data[tweet_file.var_name].forEach(function(tweet) { | |
// Total. | |
statistics.total++; | |
// Text. | |
tweet.text.split(/\s+/).forEach(function(word) { | |
addOne(statistics.text, word.toLowerCase()); | |
}); | |
// Mentions. | |
tweet.entities.user_mentions.forEach(function(user) { | |
addOne(statistics.mentions, user.screen_name.toLowerCase()); | |
}); | |
// Hashtags. | |
tweet.entities.hashtags.forEach(function(hashtag) { | |
addOne(statistics.hashtags, hashtag.text.toLowerCase()); | |
}); | |
// Sources. | |
addOne(statistics.sources, tweet.source.toLowerCase()); | |
// Hourly and Daily. | |
var created_at = new Date(tweet.created_at); | |
addOne(statistics.hourly, created_at.getHours()); | |
addOne(statistics.daily, created_at.getDay()); | |
}); | |
}); | |
function printSortedByValues(statistic, threshold) { | |
Object.keys(statistic).sort(function(a, b) { return statistic[a] - statistic[b]; }).forEach(function(key) { | |
if (statistic[key] > threshold) { | |
console.log(statistic[key] + ' ' + (statistic[key] * 100 / statistics.total).toPrecision(2) + '% ' + key); | |
} | |
}); | |
} | |
function printSortedByKeys(statistic) { | |
Object.keys(statistic).forEach(function(key) { | |
console.log(key + ' ' + statistic[key] + ' ' + (statistic[key] * 100 / statistics.total).toPrecision(2) + '%'); | |
}); | |
} | |
function printDays(statistic) { | |
var days = ['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday']; | |
for (var day = 0; day < days.length; day++) { | |
console.log(statistic[day] + ' ' + (statistic[day] * 100 / statistics.total).toPrecision(2) + '% ' + days[day]); | |
} | |
} | |
console.log('Popular words:'); | |
printSortedByValues(statistics.text, 10); | |
console.log(''); | |
console.log('Who you talk to:'); | |
printSortedByValues(statistics.mentions, 2); | |
console.log(''); | |
console.log('Hashtags used:'); | |
printSortedByValues(statistics.hashtags, 1); | |
console.log(''); | |
console.log('Tweet sources:'); | |
printSortedByValues(statistics.sources, 0); | |
console.log(''); | |
console.log('What time of the day:'); | |
printSortedByKeys(statistics.hourly); | |
console.log(''); | |
console.log('What day of the week:'); | |
printDays(statistics.daily); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment