udaylunawat · June 4, 2025 08:38
diff --git a/instagram_posts_saver.MD b/instagram_posts_saver.MD
diff --git a/save_posts_to_csv.jsx b/save_posts_to_csv.jsx
 (async function executeInstagramPostCollection(maxPosts = 10) {
    console.log(`Starting Instagram post collection (limit: ${maxPosts} posts)...`);

    // Verify we're on Instagram
    if (!window.location.hostname.includes('instagram.com')) {
        console.error("This script should be run on Instagram.com");
        return;
    }
    
    // First wait for the page to fully load
    console.log("Waiting for page to fully load...");
    await new Promise(r => setTimeout(r, 3000));

    // Scroll down and collect unique URLs with poster names until we have enough
    const urlsWithPosters = new Map();
    await scrollAndCollectUrls(urlsWithPosters, maxPosts);

    // Verify we collected data
    if (urlsWithPosters.size === 0) {
        console.error("Failed to collect any posts. Here's some debug info:");
        console.log(`Total <a> tags on page: ${document.querySelectorAll('a').length}`);
        console.log(`Instagram post links found: ${document.querySelectorAll('a[href*="/p/"]').length}`);
        console.log(`Instagram reel links found: ${document.querySelectorAll('a[href*="/reel/"]').length}`);
        console.log("Try running the script again or checking the page structure.");
        return;
    }

    // Log what we collected for debugging
    console.log("Collected posts:");
    urlsWithPosters.forEach((posterName, url) => {
        console.log(`- ${url} by ${posterName}`);
    });

    // Save the collected URLs as a CSV file
    saveUrlsAsCsv(urlsWithPosters);

    console.log("Instagram post collection complete.");
 })();

 // Function to scroll down and collect Instagram post URLs
 async function scrollAndCollectUrls(urlsWithPosters, maxPosts) {
    console.log("Starting to scroll down and collect posts...");

    let scrolling = true;
    let lastPostCount = 0;
    let noNewPostsCounter = 0;
    const maxNoNewPosts = 5; // Stop if no new posts are found after 5 scrolls

    while (scrolling) {
        // Try to collect posts
        const beforeCount = urlsWithPosters.size;
        collectUrls(urlsWithPosters, maxPosts);
        const afterCount = urlsWithPosters.size;
        
        // Log progress
        if (afterCount > beforeCount) {
            console.log(`Collected ${afterCount}/${maxPosts} posts...`);
            lastPostCount = afterCount;
            noNewPostsCounter = 0;
        } else {
            noNewPostsCounter++;
            console.log(`No new posts found (attempt ${noNewPostsCounter}/${maxNoNewPosts})`);
        }

        // Stop if we've collected enough posts
        if (urlsWithPosters.size >= maxPosts) {
            console.log(`Reached target of ${maxPosts} posts. Stopping collection.`);
            break;
        }

        // Stop if we haven't found new posts after several attempts
        if (noNewPostsCounter >= maxNoNewPosts) {
            console.log(`No new posts found after ${maxNoNewPosts} scroll attempts. Stopping collection.`);
            break;
        }

        // Scroll down
        window.scrollBy(0, window.innerHeight);
        await new Promise(r => setTimeout(r, 1500));

        // Check if new content has been loaded
        if (document.body.scrollHeight <= window.innerHeight + window.scrollY) {
            console.log("Reached the end of the page.");
            scrolling = false;
        }
    }

    console.log(`Scrolling complete. Total Instagram posts collected: ${urlsWithPosters.size}`);
 }

 // Function to collect Instagram post URLs and poster names
 function collectUrls(urlsWithPosters, maxPosts) {
    if (urlsWithPosters.size >= maxPosts) return;
    
    // Find all post and reel links
    const postLinks = Array.from(document.querySelectorAll('a[href*="/p/"], a[href*="/reel/"]'));
    
    for (const link of postLinks) {
        if (urlsWithPosters.size >= maxPosts) break;
        
        const postUrl = link.href;
        // Skip if we already have this URL
        if (!postUrl || urlsWithPosters.has(postUrl)) continue;
        
        // Extract username from the href attribute
        let username = extractUsernameFromHref(link.href);
        
        // If we couldn't get the username from the href, try to find it in the parent elements
        if (!username) {
            const parent = findParentContainer(link);
            if (parent) {
                username = findUsernameInContainer(parent);
            }
        }
        
        // Fallback if all methods fail
        if (!username) {
            username = "unknown_user";
        }
        
        // Add to our collection
        urlsWithPosters.set(postUrl, username);
        console.log(`Added post: ${postUrl} by ${username}`);
    }
 }

 // Function to extract username from the Instagram post/reel URL
 function extractUsernameFromHref(href) {
    try {
        // Instagram URLs are typically structured as:
        // https://www.instagram.com/USERNAME/p/POST_ID/
        // https://www.instagram.com/USERNAME/reel/REEL_ID/
        const url = new URL(href);
        const pathParts = url.pathname.split('/').filter(Boolean);
        
        // Check if this is a post or reel URL with the expected structure
        if (pathParts.length >= 3 && (pathParts[1] === 'p' || pathParts[1] === 'reel')) {
            return pathParts[0]; // The first path segment is the username
        }
        
        return null;
    } catch (e) {
        console.error("Error extracting username from URL:", e);
        return null;
    }
 }

 // Helper function to find username in a container
 function findUsernameInContainer(container) {
    if (!container) return null;
    
    // Look for links that might be user profiles
    const profileLinks = container.querySelectorAll('a:not([href*="/p/"]):not([href*="/reel/"])');
    for (const link of profileLinks) {
        if (!link.href || !link.href.includes('instagram.com/')) continue;
        
        try {
            const url = new URL(link.href);
            const pathParts = url.pathname.split('/').filter(Boolean);
            
            // Skip links to Instagram features
            if (pathParts.length > 0 && 
                !['explore', 'reels', 'stories', 'direct', 'accounts'].includes(pathParts[0])) {
                return pathParts[0]; // Return the username
            }
        } catch (e) {
            console.error("Error parsing profile URL:", e);
        }
    }
    
    return null;
 }

 // Helper function to find a reasonable parent container
 function findParentContainer(element) {
    if (!element) return null;
    
    // Try to find a parent that's likely to contain the username
    let current = element;
    for (let i = 0; i < 5; i++) { // Look up to 5 levels up
        if (!current.parentElement) break;
        current = current.parentElement;
        
        // If we found an article or a div that seems like a post container, return it
        if (current.tagName === 'ARTICLE' || 
            (current.tagName === 'DIV' && (
                current.hasAttribute('role') || 
                current.childElementCount > 3 || 
                current.classList.length > 0)
            )) {
            return current;
        }
    }
    return null;
 }

 // Function to save collected URLs and poster names as a CSV file
 function saveUrlsAsCsv(urlsWithPosters) {
    if (urlsWithPosters.size === 0) {
        console.error("No posts were collected. Unable to create CSV.");
        return;
    }
    
    // Create CSV content with proper escaping for CSV format
    let csvRows = ["URL,Poster"];
    
    urlsWithPosters.forEach((posterName, url) => {
        // Escape quotes in the poster name and wrap in quotes if it contains commas
        let escapedPosterName = posterName.replace(/"/g, '""');
        if (escapedPosterName.includes(',')) {
            escapedPosterName = `"${escapedPosterName}"`;
        }
        
        csvRows.push(`${url},${escapedPosterName}`);
    });
    
    let csvContent = csvRows.join('\n');
    
    // Create a Blob with the CSV data
    const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
    const url = URL.createObjectURL(blob);
    
    // Get the title of the page to use as the filename
    let pageTitle = document.title.replace(/[^a-z0-9]/gi, '_').toLowerCase(); // Sanitize title for filename
    let fileName = `${pageTitle}_instagram_post_urls.csv`;

    // Create a link element and trigger the download
    let link = document.createElement("a");
    link.setAttribute("href", url);
    link.setAttribute("download", fileName);
    document.body.appendChild(link); // Required for Firefox
    link.click();
    
    // Clean up
    setTimeout(() => {
        document.body.removeChild(link);
        window.URL.revokeObjectURL(url);
    }, 100);

    console.log(`CSV file "${fileName}" has been saved with ${urlsWithPosters.size} posts.`);
 }
	(async function executeInstagramPostCollection(maxPosts = 10) {
	console.log(`Starting Instagram post collection (limit: ${maxPosts} posts)...`);

	// Verify we're on Instagram
	if (!window.location.hostname.includes('instagram.com')) {
	console.error("This script should be run on Instagram.com");
	return;
	}

	// First wait for the page to fully load
	console.log("Waiting for page to fully load...");
	await new Promise(r => setTimeout(r, 3000));

	// Scroll down and collect unique URLs with poster names until we have enough
	const urlsWithPosters = new Map();
	await scrollAndCollectUrls(urlsWithPosters, maxPosts);

	// Verify we collected data
	if (urlsWithPosters.size === 0) {
	console.error("Failed to collect any posts. Here's some debug info:");
	console.log(`Total <a> tags on page: ${document.querySelectorAll('a').length}`);
	console.log(`Instagram post links found: ${document.querySelectorAll('a[href*="/p/"]').length}`);
	console.log(`Instagram reel links found: ${document.querySelectorAll('a[href*="/reel/"]').length}`);
	console.log("Try running the script again or checking the page structure.");
	return;
	}

	// Log what we collected for debugging
	console.log("Collected posts:");
	urlsWithPosters.forEach((posterName, url) => {
	console.log(`- ${url} by ${posterName}`);
	});

	// Save the collected URLs as a CSV file
	saveUrlsAsCsv(urlsWithPosters);

	console.log("Instagram post collection complete.");
	})();

	// Function to scroll down and collect Instagram post URLs
	async function scrollAndCollectUrls(urlsWithPosters, maxPosts) {
	console.log("Starting to scroll down and collect posts...");

	let scrolling = true;
	let lastPostCount = 0;
	let noNewPostsCounter = 0;
	const maxNoNewPosts = 5; // Stop if no new posts are found after 5 scrolls

	while (scrolling) {
	// Try to collect posts
	const beforeCount = urlsWithPosters.size;
	collectUrls(urlsWithPosters, maxPosts);
	const afterCount = urlsWithPosters.size;

	// Log progress
	if (afterCount > beforeCount) {
	console.log(`Collected ${afterCount}/${maxPosts} posts...`);
	lastPostCount = afterCount;
	noNewPostsCounter = 0;
	} else {
	noNewPostsCounter++;
	console.log(`No new posts found (attempt ${noNewPostsCounter}/${maxNoNewPosts})`);
	}

	// Stop if we've collected enough posts
	if (urlsWithPosters.size >= maxPosts) {
	console.log(`Reached target of ${maxPosts} posts. Stopping collection.`);
	break;
	}

	// Stop if we haven't found new posts after several attempts
	if (noNewPostsCounter >= maxNoNewPosts) {
	console.log(`No new posts found after ${maxNoNewPosts} scroll attempts. Stopping collection.`);
	break;
	}

	// Scroll down
	window.scrollBy(0, window.innerHeight);
	await new Promise(r => setTimeout(r, 1500));

	// Check if new content has been loaded
	if (document.body.scrollHeight <= window.innerHeight + window.scrollY) {
	console.log("Reached the end of the page.");
	scrolling = false;
	}
	}

	console.log(`Scrolling complete. Total Instagram posts collected: ${urlsWithPosters.size}`);
	}

	// Function to collect Instagram post URLs and poster names
	function collectUrls(urlsWithPosters, maxPosts) {
	if (urlsWithPosters.size >= maxPosts) return;

	// Find all post and reel links
	const postLinks = Array.from(document.querySelectorAll('a[href="/p/"], a[href="/reel/"]'));

	for (const link of postLinks) {
	if (urlsWithPosters.size >= maxPosts) break;

	const postUrl = link.href;
	// Skip if we already have this URL
	if (!postUrl \|\| urlsWithPosters.has(postUrl)) continue;

	// Extract username from the href attribute
	let username = extractUsernameFromHref(link.href);

	// If we couldn't get the username from the href, try to find it in the parent elements
	if (!username) {
	const parent = findParentContainer(link);
	if (parent) {
	username = findUsernameInContainer(parent);
	}
	}

	// Fallback if all methods fail
	if (!username) {
	username = "unknown_user";
	}

	// Add to our collection
	urlsWithPosters.set(postUrl, username);
	console.log(`Added post: ${postUrl} by ${username}`);
	}
	}

	// Function to extract username from the Instagram post/reel URL
	function extractUsernameFromHref(href) {
	try {
	// Instagram URLs are typically structured as:
	// https://www.instagram.com/USERNAME/p/POST_ID/
	// https://www.instagram.com/USERNAME/reel/REEL_ID/
	const url = new URL(href);
	const pathParts = url.pathname.split('/').filter(Boolean);

	// Check if this is a post or reel URL with the expected structure
	if (pathParts.length >= 3 && (pathParts[1] === 'p' \|\| pathParts[1] === 'reel')) {
	return pathParts[0]; // The first path segment is the username
	}

	return null;
	} catch (e) {
	console.error("Error extracting username from URL:", e);
	return null;
	}
	}

	// Helper function to find username in a container
	function findUsernameInContainer(container) {
	if (!container) return null;

	// Look for links that might be user profiles
	const profileLinks = container.querySelectorAll('a:not([href="/p/"]):not([href="/reel/"])');
	for (const link of profileLinks) {
	if (!link.href \|\| !link.href.includes('instagram.com/')) continue;

	try {
	const url = new URL(link.href);
	const pathParts = url.pathname.split('/').filter(Boolean);

	// Skip links to Instagram features
	if (pathParts.length > 0 &&
	!['explore', 'reels', 'stories', 'direct', 'accounts'].includes(pathParts[0])) {
	return pathParts[0]; // Return the username
	}
	} catch (e) {
	console.error("Error parsing profile URL:", e);
	}
	}

	return null;
	}

	// Helper function to find a reasonable parent container
	function findParentContainer(element) {
	if (!element) return null;

	// Try to find a parent that's likely to contain the username
	let current = element;
	for (let i = 0; i < 5; i++) { // Look up to 5 levels up
	if (!current.parentElement) break;
	current = current.parentElement;

	// If we found an article or a div that seems like a post container, return it
	if (current.tagName === 'ARTICLE' \|\|
	(current.tagName === 'DIV' && (
	current.hasAttribute('role') \|\|
	current.childElementCount > 3 \|\|
	current.classList.length > 0)
	)) {
	return current;
	}
	}
	return null;
	}

	// Function to save collected URLs and poster names as a CSV file
	function saveUrlsAsCsv(urlsWithPosters) {
	if (urlsWithPosters.size === 0) {
	console.error("No posts were collected. Unable to create CSV.");
	return;
	}

	// Create CSV content with proper escaping for CSV format
	let csvRows = ["URL,Poster"];

	urlsWithPosters.forEach((posterName, url) => {
	// Escape quotes in the poster name and wrap in quotes if it contains commas
	let escapedPosterName = posterName.replace(/"/g, '""');
	if (escapedPosterName.includes(',')) {
	escapedPosterName = `"${escapedPosterName}"`;
	}

	csvRows.push(`${url},${escapedPosterName}`);
	});

	let csvContent = csvRows.join('\n');

	// Create a Blob with the CSV data
	const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
	const url = URL.createObjectURL(blob);

	// Get the title of the page to use as the filename
	let pageTitle = document.title.replace(/[^a-z0-9]/gi, '_').toLowerCase(); // Sanitize title for filename
	let fileName = `${pageTitle}_instagram_post_urls.csv`;

	// Create a link element and trigger the download
	let link = document.createElement("a");
	link.setAttribute("href", url);
	link.setAttribute("download", fileName);
	document.body.appendChild(link); // Required for Firefox
	link.click();

	// Clean up
	setTimeout(() => {
	document.body.removeChild(link);
	window.URL.revokeObjectURL(url);
	}, 100);

	console.log(`CSV file "${fileName}" has been saved with ${urlsWithPosters.size} posts.`);
	}