Skip to content

Instantly share code, notes, and snippets.

@bizfreak22
Forked from pnhuyduy/insta-crawler.js
Created April 19, 2020 23:10
Show Gist options
  • Save bizfreak22/1da20166637f86597d6bbb2109a28224 to your computer and use it in GitHub Desktop.
Save bizfreak22/1da20166637f86597d6bbb2109a28224 to your computer and use it in GitHub Desktop.
// Source: https://intoli.com/blog/scrape-infinite-scroll/
const fs = require('fs');
const puppeteer = require('puppeteer');
function extractItems() {
const extractedElements = document.querySelectorAll('main > div > div:nth-child(4) > article > div > div > div img');
const items = [];
for (let element of extractedElements) {
items.push(element.src);
}
console.log(items);
return items;
}
async function scrapeInfiniteScrollItems(
page,
extractItems,
itemTargetCount,
scrollDelay = 1000,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemTargetCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('document.body.scrollHeight');
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
await page.waitFor(scrollDelay);
}
} catch(e) { }
return items;
}
(async () => {
// Set up browser and page.
const browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
page.setViewport({ width: 1280, height: 926 });
// Navigate to the demo page.
await page.goto('https://www.instagram.com/diq.ng/');
// Scroll and extract items from the page.
const items = await scrapeInfiniteScrollItems(page, extractItems, 100);
// Save extracted items to a file.
fs.writeFileSync('./items.txt', items.join('\n') + '\n');
// Close the browser.
await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment