Created
November 14, 2024 01:04
-
-
Save Davis-3450/809164849d9f3769779abaab5bd4aaef to your computer and use it in GitHub Desktop.
twitter caption scraper (js console)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Run this in the browser console on any profile's timeline page | |
| (async function() { | |
| // Set to store unique tweets | |
| let tweetsSet = new Set(); | |
| let scrollAttempts = 0; | |
| const maxScrollAttempts = 5; // Number of times to try scrolling without new tweets before stopping | |
| // Function to sleep for a given time (in milliseconds) | |
| function sleep(ms) { | |
| return new Promise(resolve => setTimeout(resolve, ms)); | |
| } | |
| // Function to extract tweets from the timeline | |
| function extractTweets() { | |
| // Locate the main timeline container | |
| const timeline = [...document.querySelectorAll('div[aria-label]')].find(el => | |
| el.getAttribute('aria-label').includes('Timeline:') | |
| ); | |
| if (timeline) { | |
| // Select all tweet containers within the timeline | |
| const tweetArticles = timeline.querySelectorAll('article[role="article"]'); | |
| tweetArticles.forEach(tweet => { | |
| // Find the div with the text content of the tweet | |
| let tweetText = tweet.querySelector('div[lang]'); | |
| // If tweet text is found, extract and store it | |
| if (tweetText) { | |
| tweetsSet.add(tweetText.innerText.trim()); | |
| } | |
| }); | |
| } else { | |
| console.warn("Timeline not found."); | |
| } | |
| } | |
| // Initial extraction | |
| extractTweets(); | |
| // Scroll and extract loop | |
| while (true) { | |
| // Record the number of tweets before scrolling | |
| let prevTweetCount = tweetsSet.size; | |
| // Scroll to the bottom of the page | |
| window.scrollTo({ | |
| top: document.body.scrollHeight, | |
| behavior: 'smooth' // Smooth scrolling | |
| }); | |
| // Wait for new tweets to load (adjust the delay as needed) | |
| await sleep(2000); // Wait 2 seconds | |
| // Extract tweets again after scrolling | |
| extractTweets(); | |
| // Check if new tweets were loaded | |
| let newTweetCount = tweetsSet.size; | |
| if (newTweetCount > prevTweetCount) { | |
| // New tweets found, reset scroll attempts | |
| scrollAttempts = 0; | |
| } else { | |
| // No new tweets found, increment scroll attempts | |
| scrollAttempts++; | |
| if (scrollAttempts >= maxScrollAttempts) { | |
| // No new tweets after several attempts, exit the loop | |
| console.log("Reached the end or no new tweets are loading."); | |
| break; | |
| } | |
| } | |
| // Optional: Slow down the scrolling by adding a longer delay | |
| await sleep(1000); // Additional wait time between scrolls | |
| } | |
| // Convert the Set to an Array for easier handling | |
| let tweets = Array.from(tweetsSet); | |
| // Output the result | |
| console.log("Collected Tweets:", tweets); | |
| return tweets; | |
| })(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment