Created
February 18, 2024 21:18
-
-
Save dcatanzaro/9ae3c00b3384cca4ba558a3e4e6f8136 to your computer and use it in GitHub Desktop.
Browser comments scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let comments = []; | |
let lastScrollHeight = 0; | |
let interval = setInterval(() => { | |
// Scroll down to load more comments | |
window.scrollTo(0, document.body.scrollHeight); | |
// Wait for new comments to load | |
setTimeout(() => { | |
// Select all comments | |
let commentElements = document.querySelectorAll('[data-testid="tweetText"] span.css-1qaijid.r-bcqeeo.r-qvutc0.r-poiln3'); | |
// Add comments to the array, avoiding duplicates | |
commentElements.forEach((element) => { | |
let comment = element.innerText; | |
if (!comments.includes(comment)) { | |
comments.push(comment); | |
} | |
}); | |
// Check if the end of the page has been reached | |
if (document.body.scrollHeight === lastScrollHeight) { | |
clearInterval(interval); | |
console.log("All comments have been collected:", comments); | |
} else { | |
lastScrollHeight = document.body.scrollHeight; | |
} | |
}, 2000); | |
}, 3000); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment