Last active
February 4, 2023 01:24
-
-
Save marcosrjjunior/7ea904c36f60324af14178a844ec9607 to your computer and use it in GitHub Desktop.
simple web scraping using js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const scraper = { | |
elements: [], | |
setElement: (name, el, option = 'textContent') => { | |
var content = document.querySelector(el); | |
if (!content) { | |
return console.log('Element for : '+ name + ' not found 404'); | |
} | |
return scraper.elements.push({[name]:content[option].trim()}); | |
}, | |
toJson: () => { | |
return JSON.stringify(scraper.elements); | |
} | |
}; | |
scraper.setElement(NAME_OF_THE_KEY, ELEMENT_SELECTOR, FUNCTION); | |
scraper.setElement('title', '#header > span.balancedHeadline', 'textContent'); | |
scraper.toJson(); | |
/** | |
HOW TO USE | |
# Set the elements | |
scraper.setElement('title', '#header > span.balancedHeadline', 'textContent'); | |
scraper.setElement('image', '#header > img', 'src'); | |
# Print the result as JSON | |
scraper.toJson() | |
{title: 'Headline Title', image: 'https://aaaa.png'} | |
OPTIONS | |
textContent, src, text, html | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment