Skip to content

Instantly share code, notes, and snippets.

@ClearlyKyle
Last active January 6, 2023 11:00
Show Gist options
  • Save ClearlyKyle/81a09e4454e895c24f15003ab3d3114c to your computer and use it in GitHub Desktop.
Save ClearlyKyle/81a09e4454e895c24f15003ab3d3114c to your computer and use it in GitHub Desktop.
Get sentences from languagecrush, and output them to a string format
/**
* Returns an array of strings representing the sentences (where they break)
* in the given HTML element (.reading-word-container).
* Words are joined by spaces, and there are spaces after punctuation marks.
*
* @param {HTMLElement} html - The HTML element containing the sentences.
* @return {string[]} An array of strings representing the sentences.
*/
function getSentences(html)
{
const nodes = html.childNodes;
let sentences = []; // Initialize an empty array to store the sentences
let currentSentence = ""; // Initialize an empty string to store the current sentence
// Initialize a variable to store the type of the last node (null if the last node was not a text node or <span>)
let lastNodeType = null;
for (let i = 0; i < nodes.length; i++) // Loop through all nodes
{
const node = nodes[i]; // Get the current node
if (node.nodeType === 3) // Check if the node is a text node
{
const text = node.textContent;
if (text === '-') // Doesnt effect words like "каким-то", only when its used to joing two parts of a sentence
currentSentence += " ";
currentSentence += node.textContent;
lastNodeType = 3; // Set the last node type to 3 (text node)
}
else if (node.tagName === "BR") // Check if the node is a <br> element
{
sentences.push(currentSentence);
currentSentence = ""; // Reset the current sentence to an empty string
lastNodeType = null; // Set the last node type to null (not a text node or <span>)
}
else if (node.tagName === "SPAN") // Check if the node is a <span> element
{
// If the last node was also a text node or <span>, add a space before the current node's text
if (lastNodeType === 3)
currentSentence += " ";
currentSentence += node.textContent;
lastNodeType = 3; // Set the last node type to 3 (text node)
}
}
// If the current sentence is not empty, add it to the list of sentences
if (currentSentence)
sentences.push(currentSentence);
return sentences;
}
// Get the HTML element with the class "reading-word-container"
const html = document.querySelector(".reading-word-container");
// Get the sentences in the HTML element
const sentences = getSentences(html);
// Log the sentences to the console, with each sentence on a separate line
console.log(sentences.join('\n'));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment