Last active
January 6, 2023 11:00
-
-
Save ClearlyKyle/81a09e4454e895c24f15003ab3d3114c to your computer and use it in GitHub Desktop.
Get sentences from languagecrush, and output them to a string format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Returns an array of strings representing the sentences (where they break) | |
| * in the given HTML element (.reading-word-container). | |
| * Words are joined by spaces, and there are spaces after punctuation marks. | |
| * | |
| * @param {HTMLElement} html - The HTML element containing the sentences. | |
| * @return {string[]} An array of strings representing the sentences. | |
| */ | |
| function getSentences(html) | |
| { | |
| const nodes = html.childNodes; | |
| let sentences = []; // Initialize an empty array to store the sentences | |
| let currentSentence = ""; // Initialize an empty string to store the current sentence | |
| // Initialize a variable to store the type of the last node (null if the last node was not a text node or <span>) | |
| let lastNodeType = null; | |
| for (let i = 0; i < nodes.length; i++) // Loop through all nodes | |
| { | |
| const node = nodes[i]; // Get the current node | |
| if (node.nodeType === 3) // Check if the node is a text node | |
| { | |
| const text = node.textContent; | |
| if (text === '-') // Doesnt effect words like "каким-то", only when its used to joing two parts of a sentence | |
| currentSentence += " "; | |
| currentSentence += node.textContent; | |
| lastNodeType = 3; // Set the last node type to 3 (text node) | |
| } | |
| else if (node.tagName === "BR") // Check if the node is a <br> element | |
| { | |
| sentences.push(currentSentence); | |
| currentSentence = ""; // Reset the current sentence to an empty string | |
| lastNodeType = null; // Set the last node type to null (not a text node or <span>) | |
| } | |
| else if (node.tagName === "SPAN") // Check if the node is a <span> element | |
| { | |
| // If the last node was also a text node or <span>, add a space before the current node's text | |
| if (lastNodeType === 3) | |
| currentSentence += " "; | |
| currentSentence += node.textContent; | |
| lastNodeType = 3; // Set the last node type to 3 (text node) | |
| } | |
| } | |
| // If the current sentence is not empty, add it to the list of sentences | |
| if (currentSentence) | |
| sentences.push(currentSentence); | |
| return sentences; | |
| } | |
| // Get the HTML element with the class "reading-word-container" | |
| const html = document.querySelector(".reading-word-container"); | |
| // Get the sentences in the HTML element | |
| const sentences = getSentences(html); | |
| // Log the sentences to the console, with each sentence on a separate line | |
| console.log(sentences.join('\n')); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment