Skip to content

Instantly share code, notes, and snippets.

@msramalho
Last active November 16, 2022 22:06
Show Gist options
  • Save msramalho/38e516519e0ff94a9a878e4505ed5527 to your computer and use it in GitHub Desktop.
Save msramalho/38e516519e0ff94a9a878e4505ed5527 to your computer and use it in GitHub Desktop.
Download all replies to a tweet in a browser, by continuously scrolling on the page, using JavaScript
/**
* Instructions:
* copy past this into a console on twitter.com, in a tweet's page.
* Then keep scrolling until the end of the page, a console.log will show you how many tweets you have saved
* 5 seconds after you stop scrolling or reach the end, a file will be downloaded with all the tweet's info
*/
let get_tweet = (e) => {
try {
let textInstances = e.querySelectorAll("div[dir='auto']");
return {
user: e.querySelector("a").href,
name: e.querySelector("div[dir='auto']").textContent,
datetime: e.querySelector("time").getAttribute("datetime"),
tweet: textInstances[textInstances.length - 1].textContent,
tweet_id: e.querySelectorAll("a")[2].href
}
} catch (error) {
// console.log(e, error);
return null
}
}
function downloadObjectAsJson(exportObj, exportName) {
var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(exportObj));
var downloadAnchorNode = document.createElement('a');
downloadAnchorNode.setAttribute("href", dataStr);
downloadAnchorNode.setAttribute("download", exportName + ".json");
document.body.appendChild(downloadAnchorNode); // required for firefox
downloadAnchorNode.click();
downloadAnchorNode.remove();
}
let uniqueTweets = [];
let stagnant = 0;
let paused = true;
let interval = setInterval(() => {
if (paused) {
paused = true;
// click "Show replies"
let showRepliesButtons;
while ((showRepliesButtons = Array.from(document.querySelectorAll("span")).filter(span => span.textContent.match("Show replies"))).length > 0) {
console.log(`clicking ${showRepliesButtons.length} buttons`);
showRepliesButtons.forEach(showReplies => showReplies.click());
}
// collect tweets
let lengthBefore = uniqueTweets.length;
Array.from(document.querySelectorAll("[data-testid='tweet']")).map(x => get_tweet(x)).forEach((tweet) => {
if (tweet !== null && !uniqueTweets.find(i => i.tweet_id === tweet.tweet_id)) {
console.log(tweet)
uniqueTweets.push(tweet)
}
});
if (uniqueTweets.length != lengthBefore) {
stagnant = 0
}
stagnant += uniqueTweets.length === lengthBefore;
if (stagnant >= 50) { //5 seconds without new tweets
console.log(`DONE found ${uniqueTweets.length} tweets`);
clearInterval(interval)
downloadObjectAsJson(uniqueTweets, `${uniqueTweets.length}_unique_tweets`)
console.log(uniqueTweets)
}
console.log(`${stagnant}/50 stagnant ticks uniqueTweets=${uniqueTweets.length}`);
window.scrollBy(0, 100)
paused = true;
}
}, 100);
@msramalho
Copy link
Author

This is necessary since Twitter will UI will destroy hidden tweets and so using querySelectorAll will never return all of the replies, so this needs to be done iteratively.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment