Skip to content

Instantly share code, notes, and snippets.

@Davis-3450
Created November 14, 2024 01:04
Show Gist options
  • Save Davis-3450/809164849d9f3769779abaab5bd4aaef to your computer and use it in GitHub Desktop.
Save Davis-3450/809164849d9f3769779abaab5bd4aaef to your computer and use it in GitHub Desktop.
twitter caption scraper (js console)
// Run this in the browser console on any profile's timeline page
(async function() {
// Set to store unique tweets
let tweetsSet = new Set();
let scrollAttempts = 0;
const maxScrollAttempts = 5; // Number of times to try scrolling without new tweets before stopping
// Function to sleep for a given time (in milliseconds)
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Function to extract tweets from the timeline
function extractTweets() {
// Locate the main timeline container
const timeline = [...document.querySelectorAll('div[aria-label]')].find(el =>
el.getAttribute('aria-label').includes('Timeline:')
);
if (timeline) {
// Select all tweet containers within the timeline
const tweetArticles = timeline.querySelectorAll('article[role="article"]');
tweetArticles.forEach(tweet => {
// Find the div with the text content of the tweet
let tweetText = tweet.querySelector('div[lang]');
// If tweet text is found, extract and store it
if (tweetText) {
tweetsSet.add(tweetText.innerText.trim());
}
});
} else {
console.warn("Timeline not found.");
}
}
// Initial extraction
extractTweets();
// Scroll and extract loop
while (true) {
// Record the number of tweets before scrolling
let prevTweetCount = tweetsSet.size;
// Scroll to the bottom of the page
window.scrollTo({
top: document.body.scrollHeight,
behavior: 'smooth' // Smooth scrolling
});
// Wait for new tweets to load (adjust the delay as needed)
await sleep(2000); // Wait 2 seconds
// Extract tweets again after scrolling
extractTweets();
// Check if new tweets were loaded
let newTweetCount = tweetsSet.size;
if (newTweetCount > prevTweetCount) {
// New tweets found, reset scroll attempts
scrollAttempts = 0;
} else {
// No new tweets found, increment scroll attempts
scrollAttempts++;
if (scrollAttempts >= maxScrollAttempts) {
// No new tweets after several attempts, exit the loop
console.log("Reached the end or no new tweets are loading.");
break;
}
}
// Optional: Slow down the scrolling by adding a longer delay
await sleep(1000); // Additional wait time between scrolls
}
// Convert the Set to an Array for easier handling
let tweets = Array.from(tweetsSet);
// Output the result
console.log("Collected Tweets:", tweets);
return tweets;
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment