Created
April 14, 2024 09:21
-
-
Save lirantal/a6093bbe4bb22dee62b9ee466a42a44d to your computer and use it in GitHub Desktop.
Fetch the contents of RSS feed updates on https://updates.snyk.io website into an array of objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This function will find all elements with the class "changelogItem published" | |
// and extract the desired information, including preserving only links in the content body. | |
function extractNewsItems() { | |
// Get all elements with the class "changelogItem published" | |
const items = document.querySelectorAll('.changelogItem.published'); | |
// Initialize an empty array to hold the news items | |
const newsItems = []; | |
// Function to process content to keep only text and anchor tags | |
function processContent(contentElement) { | |
let finalContent = ''; | |
// Loop over all child nodes of the content element | |
for (const node of contentElement.childNodes) { | |
if (node.nodeType === Node.ELEMENT_NODE && node.tagName === 'A') { | |
// If it's an anchor element, keep it as HTML | |
finalContent += node.outerHTML; | |
} else if (node.nodeType === Node.TEXT_NODE) { | |
// If it's a text node, append its content | |
finalContent += node.nodeValue; | |
} else { | |
// For all other types of elements, recurse to handle nested elements | |
finalContent += processContent(node); | |
} | |
} | |
return finalContent; | |
} | |
// Iterate over each item and extract information | |
items.forEach(item => { | |
const title = item.querySelector('h2.title a').textContent; // Extract the title | |
const datePublished = item.querySelector('.articleMeta time').getAttribute('datetime'); // Extract the publication date | |
const contentElement = item.querySelector('.content'); // Get the content element | |
const contentBody = processContent(contentElement); // Process the content to keep only text and links | |
// Create an object with the extracted information and push it into the array | |
newsItems.push({ | |
title: title, | |
datePublished: datePublished, | |
contentBody: contentBody | |
}); | |
}); | |
// Return the array of news items | |
return newsItems; | |
} | |
// Execute the function and log the result to the console | |
console.log(extractNewsItems()); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment