Skip to content

Instantly share code, notes, and snippets.

@JTRNS
Last active June 10, 2020 13:45
Show Gist options
  • Save JTRNS/334f64ca7d55c79693d8803524df37a9 to your computer and use it in GitHub Desktop.
Save JTRNS/334f64ca7d55c79693d8803524df37a9 to your computer and use it in GitHub Desktop.
Locate the pagination element on a webpage
/*
Function to find the pagination element on a webpage
with reasonable accuracy.
*/
function findPagination() {
const pagElem =
Array.from(document.querySelectorAll('body *'))
.filter(el => el.nodeType === 1 && el.nodeName !== 'SCRIPT')
.filter(el => el.innerHTML.match(/>[\t\n\r\s]*\d[\t\n\r\s]*</g) !== null )
.filter(el => el.innerHTML.match(/>[\t\n\r\s]*\d[\t\n\r\s]*</g).length > 1)
.filter((el, i, arr) => !arr.some(e => Array.from(el.children).includes(e)))
.pop();
if (pagElem) {
return pagElem
} else {
return null
}
}
/*
Some functions to help extract useful information
from the pagination element on a page
*/
// array with the visible page numbers
function findIntsInElement(elem) {
const html = elem.innerHTML
return [...html.matchAll(/>[\t\n\r\s]*(\d+)[\t\n\r\s]*</g)]
.map(n => n.pop())
}
function findFloatsInElement(elem) {
const html = elem.innerHTML
return [...html.matchAll(/>[\t\n\r\s]*(\d+[\.,]?\d+)[\t\n\r\s]*</g)]
.map(n => n.pop())
}
function findLastChildWithNumber(elem) {
const childElems = Array.from(elem.children)
return childElems.filter(node => node.nodeType === 1)
.reverse().find(el => /\d+/.test(el.innerText))
}
function findFirstSiblingWithTag(tag) {
return (elem) => {
while (!elem.nextElementSibling) {
elem = elem.parentNode
}
const sib = elem.nextElementSibling
if (sib.nodeName === tag) {
return sib
} else {
return sib.querySelector(tag)
}
}
}
// helper for finding next-page buttons
const findFirstLinkSibling = (elem) => findFirstSiblingWithTag('A')(elem)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment