Skip to content

Instantly share code, notes, and snippets.

@udaylunawat
Last active August 26, 2024 06:19
Show Gist options
  • Save udaylunawat/54e67ed556c1286fcc7f54d3294d9ce7 to your computer and use it in GitHub Desktop.
Save udaylunawat/54e67ed556c1286fcc7f54d3294d9ce7 to your computer and use it in GitHub Desktop.
Save instagram posts as csv using chrome developer tools
(async function executeInstagramPostCollection() {
    console.log("Starting Instagram post collection...");

    // Step 1: Scroll to the bottom of the page to load all content
    await scrollToBottomAndLoadContent();

    // Step 2: Scroll back to the top
    await scrollToTop();

    // Step 3: Scroll down and collect unique URLs
    const urls = new Set();
    await scrollAndCollectUrls(urls);

    // Step 4: Verify content loading and scroll if more content is being loaded
    await verifyAndContinueScrolling(urls);

    // Step 5: Save the collected URLs as a CSV file
    saveUrlsAsCsv(urls);

    console.log("Instagram post collection complete.");
})();

// Function to scroll to the bottom and ensure all content is loaded
async function scrollToBottomAndLoadContent() {
    console.log("Loading all content by scrolling to the bottom...");
    
    let lastHeight = document.body.scrollHeight;
    let retries = 0;
    const maxRetries = 10;

    while (retries < maxRetries) {
        window.scrollBy(0, window.innerHeight);
        await new Promise(r => setTimeout(r, 1000)); // Adjust this timeout as needed

        let newHeight = document.body.scrollHeight;

        if (newHeight === lastHeight) {
            retries++;
        } else {
            retries = 0; // Reset retries if new content was loaded
        }

        lastHeight = newHeight;
    }

    console.log("All content loaded.");
}

// Function to scroll to the top of the page
async function scrollToTop() {
    console.log("Scrolling to the top...");
    window.scrollTo(0, 0);
    await new Promise(r => setTimeout(r, 2000)); // Wait for the page to settle
}

// Function to scroll down and collect Instagram post URLs
async function scrollAndCollectUrls(urls) {
    console.log("Starting to scroll down, count posts, and collect URLs...");

    let scrolling = true;

    while (scrolling) {
        collectUrls(urls);

        // Scroll down
        window.scrollBy(0, window.innerHeight);
        await new Promise(r => setTimeout(r, 15)); // Short pause to allow page to render new content

        // Check if new content has been loaded
        if (document.body.scrollHeight <= window.innerHeight + window.scrollY) {
            scrolling = false;
        }
    }

    console.log(`Scrolling complete. Total unique Instagram posts found: ${urls.size}`);
}

// Function to collect Instagram post URLs
function collectUrls(urls) {
    document.querySelectorAll('a').forEach(link => {
        const href = link.href;
        if (href.startsWith("https://www.instagram.com/p/")) {
            urls.add(href);
        }
    });
}

// Function to verify content loading and continue scrolling if more content is being loaded
async function verifyAndContinueScrolling(urls) {
    console.log("Verifying content loading status...");

    let lastHeight = document.body.scrollHeight;

    for (let i = 0; i < 3; i++) {
        let previousSize = urls.size;

        // Perform a check for more content and collect URLs
        collectUrls(urls);

        // Scroll down to load more content if needed
        window.scrollBy(0, window.innerHeight);
        await new Promise(r => setTimeout(r, 1500)); // Wait 1000ms between verification checks

        let newHeight = document.body.scrollHeight;

        if (previousSize !== urls.size || newHeight > lastHeight) {
            console.log(`Verification ${i + 1}: New content loaded. Continuing to scroll...`);
            lastHeight = newHeight;
            i = 0; // Reset verification if new content is detected
        } else {
            console.log(`Verification ${i + 1}: No new content loaded.`);
        }
    }

    console.log(`Final verification complete. Total unique Instagram posts found: ${urls.size}`);
}

// Function to save collected URLs as a CSV file
function saveUrlsAsCsv(urls) {
    let urlArray = [...urls];
    let csvContent = "data:text/csv;charset=utf-8," + urlArray.map(e => e).join("\n");

    // Get the title of the page to use as the filename
    let pageTitle = document.title.replace(/[^a-z0-9]/gi, '_').toLowerCase(); // Sanitize title for filename
    let fileName = `${pageTitle}_instagram_post_urls.csv`;

    // Create a link element and trigger the download
    let encodedUri = encodeURI(csvContent);
    let link = document.createElement("a");
    link.setAttribute("href", encodedUri);
    link.setAttribute("download", fileName);
    document.body.appendChild(link); // Required for Firefox
    link.click();
    document.body.removeChild(link); // Clean up

    console.log(`CSV file "${fileName}" has been saved.`);
}

Key Changes:

  1. Dynamic Filename from Page Title:
    • The script now retrieves the title of the page using document.title and uses it as the base for the CSV filename.
    • The title is sanitized by replacing any characters that are not alphanumeric with underscores (_) to ensure the filename is valid across different operating systems.
  2. Filename Construction:
    • The filename is constructed by appending _instagram_post_urls.csv to the sanitized title of the page. For example, if the page title is "John's Instagram", the CSV file will be named johns_instagram_instagram_post_urls.csv.

Usage Instructions:

  1. Open Developer Tools:
    • Open the Instagram page with posts.
    • Open Chrome Developer Tools (F12 or right-click and select "Inspect").
    • Go to the "Console" tab.
  2. Run the Script:
    • Copy and paste the updated script into the console.
    • Press Enter to execute the script.
  3. Download the CSV:
    • The script will automatically handle everything, from scrolling and collecting URLs to saving them in a CSV file with the page title as its name.

This approach provides a meaningful filename based on the page’s content, making it easier to identify and manage the downloaded CSV files.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment