Created
January 9, 2024 21:19
-
-
Save jonniesweb/0c1d0d8ab04ec7b81f4975bfb4cf506e to your computer and use it in GitHub Desktop.
Parse over paragraphs of a HMTL document while keeping track of the hierarchy of headings for it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// array of HTML elements to be parsed for headings and paragraphs | |
const htmlElements = document.querySelectorAll('h1,h2,h3,h4,h5,h6,p'); | |
// store paragraphs with their hierarchy of headings | |
const paragraphData: Array<[string[], string]> = []; | |
// store current hierarchy of headings | |
const stack: Element[] = []; | |
// iterate through all html elements | |
Array.from(htmlElements).map((element) => { | |
// when a heading is encountered, manipulate the heading stack | |
switch (element.nodeName) { | |
case 'H1': | |
case 'H2': | |
case 'H3': | |
case 'H4': | |
case 'H5': | |
case 'H6': | |
// if empty stack, push element on | |
if (stack.length === 0) { | |
stack.push(element); | |
// if head of stack is less than element, pop until top of stack is greater, then push element on | |
} else if (stack.at(-1)!.nodeName >= element.nodeName) { | |
while (stack.length > 0 && stack.at(-1)!.nodeName >= element.nodeName) { | |
stack.pop(); | |
} | |
stack.push(element); | |
// otherwise equal or lower than heading, push element on | |
} else { | |
stack.push(element); | |
} | |
break; | |
//// when a paragraph is encountered, push the paragraph text and current heading hierarchy to the paragraphData array | |
case 'P': | |
const text = element.textContent!; | |
const key = stack.map(headings => headings.textContent) as string[]; | |
paragraphData.push([key, text]); | |
break; | |
default: | |
console.warn('unknown element type: ', element); | |
break; | |
} | |
}); | |
// display resulting paragraph with heading hierarchy data | |
console.log(paragraphData); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment