Davis-3450 · November 14, 2024 01:04
diff --git a/captionScraper.js b/captionScraper.js
 // Run this in the browser console on any profile's timeline page
 (async function() {
    // Set to store unique tweets
    let tweetsSet = new Set();
    let scrollAttempts = 0;
    const maxScrollAttempts = 5; // Number of times to try scrolling without new tweets before stopping

    // Function to sleep for a given time (in milliseconds)
    function sleep(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    // Function to extract tweets from the timeline
    function extractTweets() {
        // Locate the main timeline container
        const timeline = [...document.querySelectorAll('div[aria-label]')].find(el =>
            el.getAttribute('aria-label').includes('Timeline:')
        );

        if (timeline) {
            // Select all tweet containers within the timeline
            const tweetArticles = timeline.querySelectorAll('article[role="article"]');
            tweetArticles.forEach(tweet => {
                // Find the div with the text content of the tweet
                let tweetText = tweet.querySelector('div[lang]');

                // If tweet text is found, extract and store it
                if (tweetText) {
                    tweetsSet.add(tweetText.innerText.trim());
                }
            });
        } else {
            console.warn("Timeline not found.");
        }
    }

    // Initial extraction
    extractTweets();

    // Scroll and extract loop
    while (true) {
        // Record the number of tweets before scrolling
        let prevTweetCount = tweetsSet.size;

        // Scroll to the bottom of the page
        window.scrollTo({
            top: document.body.scrollHeight,
            behavior: 'smooth' // Smooth scrolling
        });

        // Wait for new tweets to load (adjust the delay as needed)
        await sleep(2000); // Wait 2 seconds

        // Extract tweets again after scrolling
        extractTweets();

        // Check if new tweets were loaded
        let newTweetCount = tweetsSet.size;
        if (newTweetCount > prevTweetCount) {
            // New tweets found, reset scroll attempts
            scrollAttempts = 0;
        } else {
            // No new tweets found, increment scroll attempts
            scrollAttempts++;
            if (scrollAttempts >= maxScrollAttempts) {
                // No new tweets after several attempts, exit the loop
                console.log("Reached the end or no new tweets are loading.");
                break;
            }
        }

        // Optional: Slow down the scrolling by adding a longer delay
        await sleep(1000); // Additional wait time between scrolls
    }

    // Convert the Set to an Array for easier handling
    let tweets = Array.from(tweetsSet);

    // Output the result
    console.log("Collected Tweets:", tweets);
    return tweets;
 })();
	// Run this in the browser console on any profile's timeline page
	(async function() {
	// Set to store unique tweets
	let tweetsSet = new Set();
	let scrollAttempts = 0;
	const maxScrollAttempts = 5; // Number of times to try scrolling without new tweets before stopping

	// Function to sleep for a given time (in milliseconds)
	function sleep(ms) {
	return new Promise(resolve => setTimeout(resolve, ms));
	}

	// Function to extract tweets from the timeline
	function extractTweets() {
	// Locate the main timeline container
	const timeline = [...document.querySelectorAll('div[aria-label]')].find(el =>
	el.getAttribute('aria-label').includes('Timeline:')
	);

	if (timeline) {
	// Select all tweet containers within the timeline
	const tweetArticles = timeline.querySelectorAll('article[role="article"]');
	tweetArticles.forEach(tweet => {
	// Find the div with the text content of the tweet
	let tweetText = tweet.querySelector('div[lang]');

	// If tweet text is found, extract and store it
	if (tweetText) {
	tweetsSet.add(tweetText.innerText.trim());
	}
	});
	} else {
	console.warn("Timeline not found.");
	}
	}

	// Initial extraction
	extractTweets();

	// Scroll and extract loop
	while (true) {
	// Record the number of tweets before scrolling
	let prevTweetCount = tweetsSet.size;

	// Scroll to the bottom of the page
	window.scrollTo({
	top: document.body.scrollHeight,
	behavior: 'smooth' // Smooth scrolling
	});

	// Wait for new tweets to load (adjust the delay as needed)
	await sleep(2000); // Wait 2 seconds

	// Extract tweets again after scrolling
	extractTweets();

	// Check if new tweets were loaded
	let newTweetCount = tweetsSet.size;
	if (newTweetCount > prevTweetCount) {
	// New tweets found, reset scroll attempts
	scrollAttempts = 0;
	} else {
	// No new tweets found, increment scroll attempts
	scrollAttempts++;
	if (scrollAttempts >= maxScrollAttempts) {
	// No new tweets after several attempts, exit the loop
	console.log("Reached the end or no new tweets are loading.");
	break;
	}
	}

	// Optional: Slow down the scrolling by adding a longer delay
	await sleep(1000); // Additional wait time between scrolls
	}

	// Convert the Set to an Array for easier handling
	let tweets = Array.from(tweetsSet);

	// Output the result
	console.log("Collected Tweets:", tweets);
	return tweets;
	})();
No results found