Skip to content

Instantly share code, notes, and snippets.

@motyar
Created September 18, 2021 13:20
Show Gist options
  • Save motyar/acc8c210f5d442e88c80cadae950a741 to your computer and use it in GitHub Desktop.
Save motyar/acc8c210f5d442e88c80cadae950a741 to your computer and use it in GitHub Desktop.
"Meta tag scraper" JSON API using @cloudflare workers. https://meta.motyar.workers.dev Twitter thread here https://twitter.com/motyar/status/1439208342711197696
async function handleRequest(request) {
var inputUrl = request.url.replace("https://meta.motyar.workers.dev/", "")
// return error if the url is not passed
if(!inputUrl){
return new Response(JSON.stringify({"error":"Input URL missing! Pass it like https://meta.motyar.workers.dev/example.com"}), {
headers: {
"content-type": "application/json;charset=UTF-8"
}
})
}
// add http if missing
if (!inputUrl.startsWith("http")) {
inputUrl = "http://" + inputUrl;
}
const response = await fetch(inputUrl)
var meta = {}
await new HTMLRewriter()
.on('meta', {
element(e) {
let name = e.getAttribute("name")
let property = e.getAttribute("property")
let content = e.getAttribute("content")
let key = (name || property);
if (key) {
meta[key] = content;
}
}
}).transform(response.clone());
// if title is missing in meta tags, get the title tag using regex
if (!meta["title"]) {
const html = await response.text()
var titleM = html.match(/<title[^>]*>([^<]+)<\/title>/);
if (titleM) {
meta["title"] = titleM[1];
}
}
let title = meta['title'] || meta['og:title'];
let description = meta['description'] || meta['og:description'];
return new Response(JSON.stringify(meta), {
headers: {
"content-type": "application/json;charset=UTF-8"
}
})
}
addEventListener("fetch", event => {
return event.respondWith(handleRequest(event.request))
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment