Created
July 5, 2023 14:09
-
-
Save samhenrigold/a10478246e5e69a501868ca532c31843 to your computer and use it in GitHub Desktop.
Node script to generate a cleaned, sorted list of your tweets. Drop this into the base directory of your unzipped Twitter archive & run from there.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const TWEETS_PATH = './data/tweets.js'; | |
// Read and parse tweet data. | |
function readAndParseTweets(filePath) { | |
const rawData = fs.readFileSync(filePath, 'utf8'); | |
const jsonData = rawData.replace('window.YTD.tweets.part0 = ', ''); | |
return JSON.parse(jsonData); | |
} | |
// Remove unnecessary fields from tweet data. | |
function cleanTweetData(tweets) { | |
return tweets.map(tweet => { | |
let cleanedTweet = { ...tweet.tweet }; | |
// Remove unnecessary top-level fields. | |
const fieldsToRemove = [ | |
'display_text_range', 'edit_info', 'favorited', 'id_str', 'lang', 'possibly_sensitive', 'retweeted', 'source', 'truncated', 'edit_info' | |
]; | |
fieldsToRemove.forEach(field => delete cleanedTweet[field]); | |
// Clean media objects. Wrapping in a try so that we can handle tweets without media. | |
try { | |
if (cleanedTweet.extended_entities) { | |
cleanedTweet.extended_entities.media.forEach(removeMediaCruft); | |
} | |
if (cleanedTweet.entities) { | |
cleanedTweet.entities.media.forEach(removeMediaCruft); | |
} | |
} catch (e) {} | |
// Remove any 'indices' property. This is only used for display purposes. | |
removeIndices(cleanedTweet); | |
return cleanedTweet; | |
}); | |
} | |
// Remove unnecessary fields from media object. | |
function removeMediaCruft(media) { | |
delete media.sizes; | |
} | |
// Recursively remove 'indices' properties from an object. | |
function removeIndices(obj) { | |
for (let prop in obj) { | |
if (prop === 'indices') { | |
delete obj[prop]; | |
} else if (typeof obj[prop] === 'object') { | |
removeIndices(obj[prop]); | |
} | |
} | |
} | |
// Sort an array of objects by a specific key in descending order. | |
function sortByKeyDesc(array, key) { | |
return array.sort((a, b) => b[key] - a[key]); | |
} | |
// Main execution | |
const rawTweets = readAndParseTweets(TWEETS_PATH); | |
const cleanedTweets = cleanTweetData(rawTweets); | |
const sortedTweets = sortByKeyDesc(cleanedTweets, "favorite_count"); // You can also use `retweet_count` | |
// Write sorted tweets to a JSON file. | |
fs.writeFileSync('tweets.json', JSON.stringify(sortedTweets, null, 2)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment