Skip to content

Instantly share code, notes, and snippets.

@pfeilbr
Created December 2, 2018 01:25
Show Gist options
  • Save pfeilbr/109019f3cfb50dd4738341819dff5361 to your computer and use it in GitHub Desktop.
Save pfeilbr/109019f3cfb50dd4738341819dff5361 to your computer and use it in GitHub Desktop.
parses aws slideshare pages for aws re:invent presentations adn generates a markdown formatted list of links with title
// run in browser (tested in chrome as snippet)
(async () => {
const baseURL = `https://www.slideshare.net/AmazonWebServices/presentations/`
let page = 1
const sleep = async (ms) => (new Promise(resolve => setTimeout(resolve, ms)))
const fetchItems = async (url) => {
const items = []
const parser = new DOMParser()
const resp = await fetch(url)
const html = await resp.text()
const doc = parser.parseFromString(html, "text/html")
const presentationElements = doc.querySelectorAll('#slideshows > ul > li > strong > a')
presentationElements.forEach((e) => {
if (e.title.match(`- AWS re:Invent 2018`)) {
items.push({title: e.title, url: e.href})
}
})
return items
}
let allItems = []
while (true) {
const items = await fetchItems(`${baseURL}${page}`)
allItems = allItems.concat(items)
page += 1
sleep(200)
if (items.length === 0) {
break
}
}
const allItemsSorted = allItems.sort((a, b) => {
return (a.title.match(/\(.*\)/)[0] > b.title.match(/\(.*\)/)[0]) ? 1 : -1
})
//console.log(JSON.stringify(allItemsSorted, null, 2))
const markdownList = allItemsSorted.map((i) => (`* [${i.title}](${i.url})`)).join("\n")
console.log(markdownList)
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment