Created
September 18, 2021 13:20
-
-
Save motyar/acc8c210f5d442e88c80cadae950a741 to your computer and use it in GitHub Desktop.
"Meta tag scraper" JSON API using @cloudflare workers. https://meta.motyar.workers.dev Twitter thread here https://twitter.com/motyar/status/1439208342711197696
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function handleRequest(request) { | |
var inputUrl = request.url.replace("https://meta.motyar.workers.dev/", "") | |
// return error if the url is not passed | |
if(!inputUrl){ | |
return new Response(JSON.stringify({"error":"Input URL missing! Pass it like https://meta.motyar.workers.dev/example.com"}), { | |
headers: { | |
"content-type": "application/json;charset=UTF-8" | |
} | |
}) | |
} | |
// add http if missing | |
if (!inputUrl.startsWith("http")) { | |
inputUrl = "http://" + inputUrl; | |
} | |
const response = await fetch(inputUrl) | |
var meta = {} | |
await new HTMLRewriter() | |
.on('meta', { | |
element(e) { | |
let name = e.getAttribute("name") | |
let property = e.getAttribute("property") | |
let content = e.getAttribute("content") | |
let key = (name || property); | |
if (key) { | |
meta[key] = content; | |
} | |
} | |
}).transform(response.clone()); | |
// if title is missing in meta tags, get the title tag using regex | |
if (!meta["title"]) { | |
const html = await response.text() | |
var titleM = html.match(/<title[^>]*>([^<]+)<\/title>/); | |
if (titleM) { | |
meta["title"] = titleM[1]; | |
} | |
} | |
let title = meta['title'] || meta['og:title']; | |
let description = meta['description'] || meta['og:description']; | |
return new Response(JSON.stringify(meta), { | |
headers: { | |
"content-type": "application/json;charset=UTF-8" | |
} | |
}) | |
} | |
addEventListener("fetch", event => { | |
return event.respondWith(handleRequest(event.request)) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment