- Get to the page with tweets you want to scrape.
- Throw the content of
scrape.js
into the console. console.log(JSON.stringify(Array.from(captured).map(d => d[1]), null, 2))
Last active
January 28, 2020 23:26
-
-
Save drzax/a0538cba144d3cd6e29ca69f6a48b641 to your computer and use it in GitHub Desktop.
Quick and dirty twitter scraper.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A bucket to put things in | |
var captured = new Map(); | |
// A function that captures tweets as you scroll | |
capture = () => { | |
document.querySelectorAll('article').forEach(article => { | |
let timestampEl = article.querySelector('time'); | |
let timestamp = timestampEl ? timestampEl.getAttribute('datetime') : null; | |
let imgEl = article.querySelectorAll('img')[1]; | |
let imgSrc = imgEl ? imgEl.getAttribute('src') : null; | |
let text = article.textContent | |
captured.set(text, {text, timestamp, imgSrc}) | |
}) | |
} | |
// Add it | |
document.addEventListener('scroll', capture); | |
// Remove it (or just refresh the page) | |
// document.removeEventListener('scroll', capture); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment