(async function executeInstagramPostCollection() {
console.log("Starting Instagram post collection...");
// Step 1: Scroll to the bottom of the page to load all content
await scrollToBottomAndLoadContent();
// Step 2: Scroll back to the top
await scrollToTop();
// Step 3: Scroll down and collect unique URLs
const urls = new Set();
await scrollAndCollectUrls(urls);
// Step 4: Verify content loading and scroll if more content is being loaded
await verifyAndContinueScrolling(urls);
// Step 5: Save the collected URLs as a CSV file
saveUrlsAsCsv(urls);
console.log("Instagram post collection complete.");
})();
// Function to scroll to the bottom and ensure all content is loaded
async function scrollToBottomAndLoadContent() {
console.log("Loading all content by scrolling to the bottom...");
let lastHeight = document.body.scrollHeight;
let retries = 0;
const maxRetries = 10;
while (retries < maxRetries) {
window.scrollBy(0, window.innerHeight);
await new Promise(r => setTimeout(r, 1000)); // Adjust this timeout as needed
let newHeight = document.body.scrollHeight;
if (newHeight === lastHeight) {
retries++;
} else {
retries = 0; // Reset retries if new content was loaded
}
lastHeight = newHeight;
}
console.log("All content loaded.");
}
// Function to scroll to the top of the page
async function scrollToTop() {
console.log("Scrolling to the top...");
window.scrollTo(0, 0);
await new Promise(r => setTimeout(r, 2000)); // Wait for the page to settle
}
// Function to scroll down and collect Instagram post URLs
async function scrollAndCollectUrls(urls) {
console.log("Starting to scroll down, count posts, and collect URLs...");
let scrolling = true;
while (scrolling) {
collectUrls(urls);
// Scroll down
window.scrollBy(0, window.innerHeight);
await new Promise(r => setTimeout(r, 15)); // Short pause to allow page to render new content
// Check if new content has been loaded
if (document.body.scrollHeight <= window.innerHeight + window.scrollY) {
scrolling = false;
}
}
console.log(`Scrolling complete. Total unique Instagram posts found: ${urls.size}`);
}
// Function to collect Instagram post URLs
function collectUrls(urls) {
document.querySelectorAll('a').forEach(link => {
const href = link.href;
if (href.startsWith("https://www.instagram.com/p/")) {
urls.add(href);
}
});
}
// Function to verify content loading and continue scrolling if more content is being loaded
async function verifyAndContinueScrolling(urls) {
console.log("Verifying content loading status...");
let lastHeight = document.body.scrollHeight;
for (let i = 0; i < 3; i++) {
let previousSize = urls.size;
// Perform a check for more content and collect URLs
collectUrls(urls);
// Scroll down to load more content if needed
window.scrollBy(0, window.innerHeight);
await new Promise(r => setTimeout(r, 1500)); // Wait 1000ms between verification checks
let newHeight = document.body.scrollHeight;
if (previousSize !== urls.size || newHeight > lastHeight) {
console.log(`Verification ${i + 1}: New content loaded. Continuing to scroll...`);
lastHeight = newHeight;
i = 0; // Reset verification if new content is detected
} else {
console.log(`Verification ${i + 1}: No new content loaded.`);
}
}
console.log(`Final verification complete. Total unique Instagram posts found: ${urls.size}`);
}
// Function to save collected URLs as a CSV file
function saveUrlsAsCsv(urls) {
let urlArray = [...urls];
let csvContent = "data:text/csv;charset=utf-8," + urlArray.map(e => e).join("\n");
// Get the title of the page to use as the filename
let pageTitle = document.title.replace(/[^a-z0-9]/gi, '_').toLowerCase(); // Sanitize title for filename
let fileName = `${pageTitle}_instagram_post_urls.csv`;
// Create a link element and trigger the download
let encodedUri = encodeURI(csvContent);
let link = document.createElement("a");
link.setAttribute("href", encodedUri);
link.setAttribute("download", fileName);
document.body.appendChild(link); // Required for Firefox
link.click();
document.body.removeChild(link); // Clean up
console.log(`CSV file "${fileName}" has been saved.`);
}
- Dynamic Filename from Page Title:
- The script now retrieves the title of the page using
document.title
and uses it as the base for the CSV filename. - The title is sanitized by replacing any characters that are not alphanumeric with underscores (
_
) to ensure the filename is valid across different operating systems.
- The script now retrieves the title of the page using
- Filename Construction:
- The filename is constructed by appending
_instagram_post_urls.csv
to the sanitized title of the page. For example, if the page title is "John's Instagram", the CSV file will be namedjohns_instagram_instagram_post_urls.csv
.
- The filename is constructed by appending
- Open Developer Tools:
- Open the Instagram page with posts.
- Open Chrome Developer Tools (
F12
or right-click and select "Inspect"). - Go to the "Console" tab.
- Run the Script:
- Copy and paste the updated script into the console.
- Press
Enter
to execute the script.
- Download the CSV:
- The script will automatically handle everything, from scrolling and collecting URLs to saving them in a CSV file with the page title as its name.
This approach provides a meaningful filename based on the page’s content, making it easier to identify and manage the downloaded CSV files.