Created
November 29, 2019 11:10
-
-
Save fgm/42affc85a3f0f22aab6e47bbfeca4f7f to your computer and use it in GitHub Desktop.
List of the 10 most repeated links on a HTML page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function duplicateLinks(document) { | |
const elements = Array.from(document.body.getElementsByTagName('a')); | |
const linkElements = elements.filter((node, index) => { | |
const href = node.getAttribute('href'); | |
const isRemote = href && href.match(/^http/); | |
if (isRemote) { | |
return true; | |
} | |
}); | |
const urls = linkElements.reduce((accu, el, index) => { | |
const href = el.href; | |
if (!accu[href]) { | |
accu[href] = 0; | |
} | |
accu[href]++; | |
return accu; | |
}, {}); | |
const duplicates = {}; | |
for (const url in urls) { | |
if (urls[url] > 1) { | |
duplicates[url] = urls[url]; | |
} | |
} | |
const list = Object.entries(duplicates); | |
const sorted = list.sort((u, v) => { | |
const countDiff = Math.sign(v[1] - u[1]); | |
const localeDiff = u[0].localeCompare(v[0]); | |
return countDiff ? countDiff : localeDiff; | |
}); | |
return sorted; | |
} | |
for (const dup of duplicateLinks(document).slice(0,10)) { | |
console.log(`\t${dup[1]}\t${dup[0]}`); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment