Last active
February 28, 2018 13:27
-
-
Save fdebijl/f9fd1a8d61de7f06e47309adf1a7da57 to your computer and use it in GitHub Desktop.
ProPublica - cleanad.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Dit zijn alle elementen die persoonsgegevens kunnen bevatten of | |
// om andere reden niet belangrijk zijn, zoals de like-knop. | |
const selectors = [ | |
"video", | |
"input", | |
"button", | |
"iframe", | |
'a[href=""]', | |
".accessible_elem", | |
".uiLikePagebutton", | |
".uiPopOver", | |
".uiCloseButton", | |
".uiChevronSelectorButton", | |
"h5._1qbu", | |
".commentable_item" | |
].join(", "); | |
const cleanAd = html => { | |
// De HTML van de advertentie wordt naar een tijdelijk element gekopieerd | |
// zodat deze zonder gevolgen voor de gebruiker gekopieerd kan worden | |
let node = document.createElement("div"); | |
node.innerHTML = html; | |
// Alle elementen die overeenkomen met de selectors hierboven worden verwijderd | |
Array.from(node.querySelectorAll(selectors)).forEach(i => i.remove()); | |
// Verwijder alle attributen die persoons- of herleidbare gegevens bevatten | |
const killAttrs = node => { | |
Array.from(node.attributes).forEach(attr => { | |
if ( | |
attr.name !== "id" && | |
attr.name !== "class" && | |
attr.name !== "src" && | |
attr.name !== "href" | |
) | |
node.removeAttribute(attr.name); | |
// Links kunnen ook tracking-informatie bevatten, deze halen we hier weg | |
if (attr.name === "href") { | |
try { | |
let url = new URL(attr.value); | |
if (url.host === "l.facebook.com") | |
url = new URL(new URLSearchParams(url.search).get("u")); | |
if (url.origin && url.pathname) { | |
node.setAttribute(attr.name, url.origin + url.pathname); | |
} else { | |
node.removeAttribute(attr.name); | |
} | |
} catch (e) { | |
node.removeAttribute(attr.name); | |
} | |
} | |
}); | |
}; | |
// Pas bovenstaande filtering toe op elk element | |
Array.from(node.children).forEach(killAttrs); | |
return node.innerHTML.replace(/&/g, "&"); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment