Last active
November 16, 2022 22:06
-
-
Save msramalho/38e516519e0ff94a9a878e4505ed5527 to your computer and use it in GitHub Desktop.
Download all replies to a tweet in a browser, by continuously scrolling on the page, using JavaScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Instructions: | |
* copy past this into a console on twitter.com, in a tweet's page. | |
* Then keep scrolling until the end of the page, a console.log will show you how many tweets you have saved | |
* 5 seconds after you stop scrolling or reach the end, a file will be downloaded with all the tweet's info | |
*/ | |
let get_tweet = (e) => { | |
try { | |
let textInstances = e.querySelectorAll("div[dir='auto']"); | |
return { | |
user: e.querySelector("a").href, | |
name: e.querySelector("div[dir='auto']").textContent, | |
datetime: e.querySelector("time").getAttribute("datetime"), | |
tweet: textInstances[textInstances.length - 1].textContent, | |
tweet_id: e.querySelectorAll("a")[2].href | |
} | |
} catch (error) { | |
// console.log(e, error); | |
return null | |
} | |
} | |
function downloadObjectAsJson(exportObj, exportName) { | |
var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(exportObj)); | |
var downloadAnchorNode = document.createElement('a'); | |
downloadAnchorNode.setAttribute("href", dataStr); | |
downloadAnchorNode.setAttribute("download", exportName + ".json"); | |
document.body.appendChild(downloadAnchorNode); // required for firefox | |
downloadAnchorNode.click(); | |
downloadAnchorNode.remove(); | |
} | |
let uniqueTweets = []; | |
let stagnant = 0; | |
let paused = true; | |
let interval = setInterval(() => { | |
if (paused) { | |
paused = true; | |
// click "Show replies" | |
let showRepliesButtons; | |
while ((showRepliesButtons = Array.from(document.querySelectorAll("span")).filter(span => span.textContent.match("Show replies"))).length > 0) { | |
console.log(`clicking ${showRepliesButtons.length} buttons`); | |
showRepliesButtons.forEach(showReplies => showReplies.click()); | |
} | |
// collect tweets | |
let lengthBefore = uniqueTweets.length; | |
Array.from(document.querySelectorAll("[data-testid='tweet']")).map(x => get_tweet(x)).forEach((tweet) => { | |
if (tweet !== null && !uniqueTweets.find(i => i.tweet_id === tweet.tweet_id)) { | |
console.log(tweet) | |
uniqueTweets.push(tweet) | |
} | |
}); | |
if (uniqueTweets.length != lengthBefore) { | |
stagnant = 0 | |
} | |
stagnant += uniqueTweets.length === lengthBefore; | |
if (stagnant >= 50) { //5 seconds without new tweets | |
console.log(`DONE found ${uniqueTweets.length} tweets`); | |
clearInterval(interval) | |
downloadObjectAsJson(uniqueTweets, `${uniqueTweets.length}_unique_tweets`) | |
console.log(uniqueTweets) | |
} | |
console.log(`${stagnant}/50 stagnant ticks uniqueTweets=${uniqueTweets.length}`); | |
window.scrollBy(0, 100) | |
paused = true; | |
} | |
}, 100); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is necessary since Twitter will UI will destroy hidden tweets and so using
querySelectorAll
will never return all of the replies, so this needs to be done iteratively.