Created
June 10, 2021 06:58
-
-
Save AggressivelyMeows/641c17d975179befb7dc79ed9b850e75 to your computer and use it in GitHub Desktop.
Worker to grab the meta tags of a site
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Single-line install of CFW-EU (https://www.npmjs.com/package/cfw-easy-utils) | |
var cfweu = {};function t(e){var t="",s="0123456789abcdef";return new Uint8Array(e).forEach((e=>{t+=s[e>>4]+s[15&e]})),t}const s={uuidv4:()=>([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g,(e=>(e^crypto.getRandomValues(new Uint8Array(1))[0]&15>>e/4).toString(16))),async hashPassword(e,s){var r=(s=s||{}).salt||crypto.getRandomValues(new Uint8Array(8)),n=s.iterations||45e3;"string"==typeof r&&(r=function(e){(e=e.replace(/^0x/,"")).length%2!=0&&console.log("WARNING: expecting an even number of characters in the hexString");var t=e.match(/[G-Z\s]/i);t&&console.log("WARNING: found non-hex characters",t);var s=e.match(/[\dA-F]{2}/gi).map((function(e){return parseInt(e,16)}));return new Uint8Array(s).buffer}(r));const a=new TextEncoder("utf-8").encode(e),i=await crypto.subtle.importKey("raw",a,{name:"PBKDF2"},!1,["deriveBits","deriveKey"]),o=await crypto.subtle.deriveKey({name:"PBKDF2",salt:r,iterations:n,hash:"SHA-256"},i,{name:"AES-CBC",length:256},!0,["encrypt","decrypt"]);return`$PBKDF2;h=${t(await crypto.subtle.exportKey("raw",o))};s=${t(r)};i=${n};`},async validatePassword(e,t){for(var s={salt:t.split(";s=")[1].split(";")[0],iterations:t.split(";i=")[1].split(";")[0]},r=await this.hashPassword(e,s),n=!0,a=0;a<r.length;a++)r.charAt(a)!=t.charAt(a)&&(n=!1);return n}};const r={version:"1.0.0",config:{secretKey:"password",debugHeaders:!1},accessControl:{allowOrigin:"*",allowMethods:"GET, POST, PUT",allowHeaders:"Content-Type"},request:null,_corsHeaders(){var e=this.accessControl.allowOrigin;return this.request&&(e=new URL(this.request.url).origin),{"Access-Control-Allow-Origin":e,"Access-Control-Allow-Methods":this.accessControl.allowMethods,"Access-Control-Max-Age":"1728000"}},injectCors(e,t){this._corsHeaders()},_genericResponse(e,t,s){if(void 0===s)s={};var r=s.headers||{},n=s.status||200,a=s.statusText||"OK",i=s.autoCors;void 0===s.autoCors&&(i=!0);var o=s.cookies||null,c=s.stopwatch||null,h={"Content-Type":e,...r};i&&(h={...h,...this._corsHeaders()});var l=new Response(t,{status:n,statusText:a,headers:h});o&&o.values().forEach((e=>{l.headers.append("Set-Cookie",e)}));return c&&l.headers.set("Server-Timing",c.getHeader()),this.config.debugHeaders&&l.headers.set("x-cfw-eu-version",this.version),new Promise((e=>e(l)))},cors(e){return e&&(this.request=e),new Response(null,{headers:this._corsHeaders()})},json(e,t){var s=e;return"string"!=typeof s&&(s=JSON.stringify(e)),this._genericResponse("application/json",s,t)},html(e,t){return this._genericResponse("text/html",e,t)},text(e,t){return this._genericResponse("plain/text",e,t)},fromResponse(e,t){var s={...this.headersToObject(e.headers),...t.headers||{}};"headers"in t&&delete t.headers;var r=JSON.parse(JSON.stringify(s["content-type"]));return"content-type"in s&&delete s["content-type"],this._genericResponse(r,e.body,{headers:s,...t})},async static(e,t){var s=t.baseUrl;if(!s)throw"You need to specify a baseUrl for response.static to work.";let r=new URL(e.url);var n=await fetch(`${s}${r.pathname.replace(t.routePrefix||"<>","")}`,{cf:{cacheTtl:t.ttl||600}});return this.fromResponse(n,t)},websocket:async e=>new Response(null,{status:101,webSocket:e.client}),setHeader(e,t,s){var r=new Response(e.body,e),n=s;return"function"==typeof s.values?(n=s.values()).forEach((e=>{r.headers.append(t,e)})):r.headers.append(t,n),r},headersToObject:e=>Object.fromEntries(e.entries())};cfweu.Stopwatch=class{constructor(){this.start=new Date,this.checkpoints=[{name:"Start",dur:0,desc:"Started recording"}],this.lastCheckpointTime=new Date}mark(e,t){console.log("Last checkpoint: ",this.lastCheckpointTime),console.log(new Date),console.log("Difference: ",new Date-this.lastCheckpointTime),this.checkpoints.push({name:e,time:new Date,dur:new Date-this.lastCheckpointTime,...t}),this.lastCheckpointTime=new Date}getTotalTime(){return new Date-this.start}join(e){var t=this.checkpoints+e.checkpoints;this.checkpoints=t.sort(((e,t)=>t.time-e.time))}getHeader(){var e=[];return this.checkpoints.forEach((t=>{var s=[t.name];Object.keys(t).forEach((e=>{"name"!=e&&"time"!=e&&s.push(`${e}=${t[e]}`)})),e.push(s.join(";"))})),e.join(",")}},cfweu.Websocket=class{constructor(t,s){this.url=t,this.options=s,this.emitter=e.createNanoEvents(),this.sendQueue=[],this.url.includes("wss:")&&(this.url=this.url.replace("wss:","https:")),this.url.includes("ws:")&&(this.url=this.url.replace("ws:","http:")),this.connect()}log(e){this.options.logger&&this.options.logger.send(e)}async connect(){var e=await fetch(this.url,{headers:{upgrade:"websocket"}});this.socket=e.webSocket,this.socket.accept(),this.socket.addEventListener("message",(e=>{this.emitter.emit("rawmessage",e);var t=e.data;try{"string"==typeof(t=JSON.parse(t))&&(t=e.data)}catch(e){}this.emitter.emit("message",t)})),this.socket.addEventListener("close",(()=>{this.emitter.emit("close")})),this.sendQueue.forEach((e=>{this.socket.send(e)})),this.sendQueue=[]}on(e,t){return this.emitter.on(e,t)}addEventListener(e,t){return"message"==e&&(e="rawmessage"),this.emitter.on(e,t)}send(e,t){var s=e;e.constructor!=Object&&e.constructor!=Array||(s=JSON.stringify(s)),this.socket?this.socket.send(s):this.sendQueue.push(s)}},cfweu.WebsocketResponse=class{constructor(){let t=new WebSocketPair;this.socket=t[1],this.client=t[0],this.socket.accept(),this.emitter=e.createNanoEvents(),this.session={history:[],startTime:new Date,lastMessageTime:null},this.socket.addEventListener("message",(e=>{var t=e.data;try{"string"==typeof(t=JSON.stringify(t))&&(t=e.data)}catch(e){}this.session.lastMessageTime=new Date,this.emitter.emit("message",t)})),this.socket.addEventListener("close",(()=>this.emitter.emit("close")))}on(e,t){return this.emitter.on(e,t)}send(e,t){var s=e;e.constructor!=Object&&e.constructor!=Array||(s=JSON.stringify(s)),this.socket.send(s)}},cfweu.response=r,cfweu.secrets=s; | |
var { response } = cfweu | |
addEventListener('fetch', event => { | |
event.respondWith(handleRequest(event.request)) | |
}) | |
/** | |
* Fetch and log a given request object | |
* @param {Request} request | |
*/ | |
async function handleRequest(request) { | |
var url = new URL(request.url).searchParams.get('url') | |
var resp = await fetch( | |
url + '?_escaped_fragment_=1', | |
{ | |
headers: { | |
// Trick the website into thinking we are a search engine. | |
// This may trigger a pre-render or SSR situation which will return our meta tags. | |
'user-agent': ' Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' | |
} | |
} | |
) | |
var metadata = {} | |
class Scraper { | |
constructor() { | |
this.values = {} | |
} | |
element(element) { | |
var property = element.getAttribute('property') | |
this.values[property] = element.getAttribute('content') | |
} | |
} | |
const scraper = new Scraper() | |
await new HTMLRewriter().on('meta', scraper).transform(resp).arrayBuffer() | |
return response.json({url, metadata: scraper.values}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment