Last active
October 13, 2022 22:08
-
-
Save agamm/094fe7896ffad04468e17b2466735531 to your computer and use it in GitHub Desktop.
Get only visible text elements from HTML of a page.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// From the creator of unzip.dev | |
const texts = document.querySelectorAll( | |
"p, span, a, h1, h2, h3, h4, h5, h6, input, button, pre" | |
); | |
let visible = []; | |
texts.forEach((e) => { | |
var style = window.getComputedStyle(e); | |
if (style.display !== "none" && style.visibility !== "hidden") { | |
visible.push(e); | |
} | |
}); | |
const cleanedTexts = Array.from(visible) | |
.map((t) => t.textContent.replace(/ /g, "").replace(/\n\n/g, "\n")) | |
.filter((t) => t.length > 1 && t.replace(/\n/g, "").length > 1); | |
const pretty = JSON.stringify(cleanedTexts, null, 2); | |
console.log(pretty); | |
copy(pretty); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment