Skip to content

Instantly share code, notes, and snippets.

@dexit
Created November 6, 2025 10:39
Show Gist options
  • Select an option

  • Save dexit/f989148d43383c5b666f6e0e787a9b6f to your computer and use it in GitHub Desktop.

Select an option

Save dexit/f989148d43383c5b666f6e0e787a9b6f to your computer and use it in GitHub Desktop.
Logging Table
CREATE TABLE request_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
endpoint TEXT NOT NULL,
method TEXT NOT NULL,
url TEXT NOT NULL,
ip_address TEXT,
user_agent TEXT,
referer TEXT,
response_status_code INTEGER NOT NULL,
response_body_snippet TEXT,
source TEXT
);
// --- CONFIGURATION VIA ENVIRONMENT VARIABLES ---
// In your Worker's Settings -> Variables, set the following:
//
// 1. ALLOWED_ORIGINS (Whitelist for target URLs for the proxy)
// Value: *.example.com, api.another.com, my-domain.net
//
// 2. PROXY_ENDPOINT (The path the proxy will run on)
// Value: /corsproxy/
//
// 3. SEARCH_PARAM (The query parameter for the proxy, e.g., 'url' or 'apiurl')
// Value: apiurl
const HEADERS_TO_STRIP = [
'content-security-policy',
'x-frame-options',
'x-content-type-options',
];
// --- PROXY HELPER FUNCTIONS (Unchanged) ---
function isUrlAllowed(targetUrl, allowedOriginsCommaSeparated) {
if (!allowedOriginsCommaSeparated) return false;
const whitelist = allowedOriginsCommaSeparated.split(',').map(domain => domain.trim());
const hostname = new URL(targetUrl).hostname;
for (const pattern of whitelist) {
if (pattern === '*') return true;
if (pattern.startsWith('*.')) {
const domain = pattern.substring(2);
if (hostname.endsWith('.' + domain) || hostname === domain) return true;
} else {
if (hostname === pattern) return true;
}
}
return false;
}
async function handleProxyOptions(request) {
const corsHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "GET, HEAD, POST, PUT, DELETE, OPTIONS",
"Access-Control-Max-Age": "86400",
};
if (request.headers.get("Origin") !== null && request.headers.get("Access-Control-Request-Method") !== null && request.headers.get("Access-Control-Request-Headers") !== null) {
return new Response(null, {
headers: { ...corsHeaders, "Access-Control-Allow-Headers": request.headers.get("Access-Control-Request-Headers") },
});
} else {
return new Response(null, { headers: { Allow: "GET, HEAD, POST, PUT, DELETE, OPTIONS" } });
}
}
async function handleProxyRequest(request, env) {
const url = new URL(request.url);
const definedSearchParam = env.SEARCH_PARAM || 'apiurl';
const allowedOrigins = env.ALLOWED_ORIGINS;
const targetUrl = url.searchParams.get(definedSearchParam);
if (!targetUrl) {
return new Response(`Error: "${definedSearchParam}" query parameter is missing.`, { status: 400 });
}
if (!isUrlAllowed(targetUrl, allowedOrigins)) {
return new Response(`Error: The requested URL is not in the whitelist.`, { status: 403 });
}
const forwardedRequest = new Request(targetUrl, request);
forwardedRequest.headers.set("Origin", new URL(targetUrl).origin);
let response = await fetch(forwardedRequest);
response = new Response(response.body, response);
response.headers.set("Access-Control-Allow-Origin", url.origin);
response.headers.append("Vary", "Origin");
HEADERS_TO_STRIP.forEach(header => response.headers.delete(header));
return response;
}
// --- POSTCODE LOOKUP FUNCTIONS (Unchanged) ---
const UK_POSTCODE_REGEX = /^([A-Z][A-Z0-9]{2,3}[\s]?[0-9][A-Z]{2})|([A-Z][0-9][A-Z0-9][\s]?[0-9][A-Z]{2})|([A-Z]{2}[0-9]{2}[\s]?[0-9][A-Z]{2})|([A-Z][0-9][\s]?[0-9][A-Z]{2})|([A-Z]{2}[0-9][\s]?[0-9][A-Z]{2})|([A-Z][A-Z0-9]{3}[\s]?[0-9][A-Z]{2})$/i;
async function handlePostcodeRequest(request) {
const { pathname } = new URL(request.url);
const postcode = decodeURIComponent(pathname.split('/')[2] || "").trim();
const corsJsonResponse = (data, httpStatus, jsonStatus, patternResult) => {
const responseBody = { status: jsonStatus, postcode_pattern: patternResult, ...data };
return new Response(JSON.stringify(responseBody), {
status: httpStatus,
headers: {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Cache-Control': 's-maxage=86400'
},
});
};
if (!postcode) {
return corsJsonResponse({ postcode, error: "Postcode missing. Use format /postcodes/YourPostcode" }, 400, 'error', 'invalid');
}
const isPatternValid = UK_POSTCODE_REGEX.test(postcode);
const patternResult = isPatternValid ? 'valid' : 'invalid';
if (!isPatternValid) {
return corsJsonResponse({ postcode, error: "Invalid UK postcode format." }, 400, 'error', patternResult);
}
const encodedPostcode = encodeURIComponent(postcode);
const fetchHeaders = { 'User-Agent': 'Cloudflare-Worker-Postcode-Aggregator/1.0' };
const activeResponse = await fetch(`https://api.postcodes.io/postcodes/${encodedPostcode}`, { headers: fetchHeaders });
if (activeResponse.ok) {
const data = await activeResponse.json();
return corsJsonResponse({ postcode, source: 'postcodes.io (active)', data: data.result }, 200, 'success', patternResult);
}
if (activeResponse.status === 404) {
const terminatedResponse = await fetch(`https://api.postcodes.io/terminated_postcodes/${encodedPostcode}`, { headers: fetchHeaders });
if (terminatedResponse.ok) {
const data = await terminatedResponse.json();
return corsJsonResponse({ postcode, source: 'postcodes.io (terminated)', error: "Postcode is terminated and no longer in use.", data: data.result }, 400, 'error', patternResult);
}
}
const osmResponse = await fetch(`https://nominatim.openstreetmap.org/search?q=${encodedPostcode}&format=jsonv2&limit=5`, { headers: fetchHeaders });
if (osmResponse.ok) {
const data = await osmResponse.json();
if (data && data.length > 0) {
return corsJsonResponse({ postcode, source: 'openstreetmap', data: data }, 200, 'success', patternResult);
}
}
return corsJsonResponse({ postcode, error: "Postcode not found in any available source." }, 400, 'error', patternResult);
}
// --- NEW: D1 LOGGING AND CLEANUP ---
/**
* Asynchronously logs request and response data to a D1 database.
* This function is designed to not block the response to the client.
*/
async function logToD1(request, response, env, endpoint) {
try {
const { method, url } = request;
const ip_address = request.headers.get('CF-Connecting-IP') || '';
const user_agent = request.headers.get('User-Agent') || '';
const referer = request.headers.get('Referer') || '';
const response_status_code = response.status;
const response_clone = response.clone();
let response_body_snippet = '';
let source = null;
if (endpoint === '/postcodes/') {
const json = await response_clone.json();
source = json.source || null; // Extract source from the JSON body
response_body_snippet = JSON.stringify(json);
} else if (endpoint === '/corsproxy/') {
const text = await response_clone.text();
response_body_snippet = text.substring(0, 512); // Log a larger snippet for the proxy
}
const stmt = env.DB.prepare(
`INSERT INTO request_logs (timestamp, endpoint, method, url, ip_address, user_agent, referer, response_status_code, response_body_snippet, source)
VALUES (datetime('now'), ?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
await stmt.bind(endpoint, method, url, ip_address, user_agent, referer, response_status_code, response_body_snippet, source).run();
} catch (e) {
console.error("D1 LOGGING FAILED:", e);
}
}
/**
* Deletes log entries from the D1 database that are older than 30 days.
*/
async function cleanupOldLogs(env) {
try {
console.log("Running scheduled cleanup of old D1 logs...");
const stmt = env.DB.prepare(`DELETE FROM request_logs WHERE timestamp < datetime('now', '-30 days')`);
const { success, meta } = await stmt.run();
if (success) {
console.log(`Successfully deleted ${meta.changes} old log entries.`);
} else {
console.error("Log cleanup failed.", meta);
}
} catch (e) {
console.error("D1 CLEANUP FAILED:", e);
}
}
// --- MAIN ROUTER AND HANDLER ---
export default {
async fetch(request, env, ctx) {
const url = new URL(request.url);
const proxyEndpoint = env.PROXY_ENDPOINT || "/corsproxy/";
let response;
let endpoint = 'other';
// --- Routing Logic ---
if (url.pathname.startsWith(proxyEndpoint)) {
endpoint = '/corsproxy/';
if (request.method === "OPTIONS") {
response = await handleProxyOptions(request);
} else {
response = await handleProxyRequest(request, env);
}
} else if (url.pathname.startsWith("/postcodes/")) {
endpoint = '/postcodes/';
response = await handlePostcodeRequest(request);
} else {
response = new Response("Worker is running. Use the /corsproxy/ or /postcodes/ endpoints.", {
headers: { "Content-Type": "text/plain" },
});
}
// --- Asynchronous Logging ---
// Use ctx.waitUntil to perform logging after the response has been sent.
if (env.DB) {
ctx.waitUntil(logToD1(request, response, env, endpoint));
}
return response;
},
async scheduled(controller, env, ctx) {
// This is triggered by the cron schedule in wrangler.toml
ctx.waitUntil(cleanupOldLogs(env));
}
};
[[d1_databases]]
binding = "DB" # This is the name you'll use in your code (env.DB)
database_name = "your-database-name"
database_id = "your-database-id"
[triggers]
crons = ["0 5 1 * *"] # At 05:00 on day-of-month 1.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment