Skip to content

Instantly share code, notes, and snippets.

@bbuechler
Last active December 22, 2022 19:17
Show Gist options
  • Select an option

  • Save bbuechler/c97f7f9091f2794fd5211da428f54936 to your computer and use it in GitHub Desktop.

Select an option

Save bbuechler/c97f7f9091f2794fd5211da428f54936 to your computer and use it in GitHub Desktop.
Lambda URL Disambiguator

Cloud-based URL disambiguator

(esentially a distributed load-balancer)

There needs to be an API Gateway w/ a /{proxy+} GET method.

Important Config

  • HOSTS List hosts where a request may be serviced
HOSTS = [ 'host1.path.com', 'host2.path.com', ... 'hostX.path.com']
  • If the above don't respond successfully, pit the request here:
DEFAULT_HOST = 'failover.path.com'
from requests import Session
import requests.exceptions
from requests_futures.sessions import FuturesSession
from concurrent.futures import as_completed
from chalice import Chalice, Response
import json
import time
import os
import base64
#### Config
# Potential remote source
HOSTS = [ 'cumulus.asf.alaska.edu', 'sentinel1.asf.alaska.edu']
# If the above don't respond successfully, pit the request here:
DEFAULT_HOST = 'archive.asf.alaska.edu'
# How long to keep an accessed url cache to avoid rapid retry
CACHE_MINS = 5 # Number of minutes to cache a request
# Cookie to grab metrics
JWT_COOKIE_NAME = 'asf-urs'
# URL cache
PATH_CACHE = {}
# If true, don't forward unknown requests to DEFAULT_HOST
OUTAGE = os.getenv('OUTAGE')
# Re-try all incoming URLS as HEAD requests, simultaneous, at all HOSTS entries
def find_answer(path):
print(f"Finding host for {path} the dynamic way")
with FuturesSession() as session:
futures = [ session.head(f'https://{host}/{path}') for host in HOSTS ]
for future in as_completed(futures):
try:
resp = future.result()
if resp.status_code in (200, 303, 302):
return resp.url
else:
print (f"{resp.url} with {resp.status_code} (miss!)")
except requests.exceptions.HTTPError as E:
print(f"Problem fetching {E.request.url}: {E}")
except requests.exceptions.ConnectionError as E:
print(f"Problem fetching {E.request.url}: {E}")
except Exception as E:
print(f"Problem:{E}")
return False
app = Chalice(app_name='disambiguate')
# Cookie logging for metrics gathering
def get_cookie_username():
headers = app.current_request.headers
if 'cookie' in headers:
cookie=headers['cookie']
try:
all_cookies = {k: v for k,v in ( x.split("=",1) for x in cookie.split("; ") )}
if JWT_COOKIE_NAME in all_cookies:
# Try to decode part of the JWT token to get user id
urs_cookie = all_cookies[JWT_COOKIE_NAME]
jwt_b64 = bytes(urs_cookie.split('.')[1], 'utf-8') + b'==='
jwt_payload = base64.b64decode(jwt_b64)
urs_user_id = json.loads(jwt_payload)['urs-user-id']
return urs_user_id
print (f"No usable {JWT_COOKIE_NAME} cookie found in {all_cookies}")
except Exception as E:
print(f"Could not decode {cookie}: {E}")
return None
def json_log(path, response, result, timer, uri=None):
log_json = {"sourceIp": app.current_request.context['identity']['sourceIp'],
"path": path, "response": response, "timer": timer,
"redirect": uri, "result":result }
user_id = get_cookie_username()
if user_id:
log_json["edl"] = user_id
print ( json.dumps( log_json ) )
def good_path(uri, path, time_in, reuse=False):
timer = round(time.time() - time_in, 3)
result = "Successful Redirect" + (" W/ URI Cache Hit" if reuse else "")
json_log(path, 303, result, timer, uri)
headers = {'Cache-Control': 'private, max-age=3600',
'Location': uri }
return Response(body='', headers=headers, status_code=303)
# Awful trash formatting... sorry, not sorry.
def bad_path(path, time_in, reuse=False):
timer = round(time.time() - time_in, 3)
if OUTAGE:
result = "Outage Message" + (" W/ URI Cache Hit" if reuse else "")
json_log(path, 503, result, timer)
title_text = "Sorry, data is currently unavailable"
pre_format = "border: 3px double silver; background-color: lightgrey; display: inline-block"
body_text = "Due to system maintenance, the file you requested is currently not available: " + \
f"<pre style='{pre_format}'>{path}</pre> " + \
"<br>Please contact us at <a href='mailto:uso@asf.alaska.edu'>uso@asf.alaska.edu</a> if you have any questions."
return Response(body=f"<html><title>{title_text}</title><body>{body_text}</body></html>",
status_code=503, headers={'Content-Type':'text/html'})
default_uri = f"https://{DEFAULT_HOST}/{path}"
result = "Default Passthrough" + (" W/ URI Cache Hit" if reuse else "")
json_log(path, 404, result, timer, default_uri)
return Response(body='', headers={'Location': default_uri}, status_code=303)
@app.route('/')
def root():
print ("In root()")
return Response(body="<html><title>Nothing Here</title><body>Nothing Here 🤷</body></html>", status_code=200, headers={'Content-Type':'text/html'})
@app.route('/{proxy+}', methods=['GET', 'HEAD'])
def dynamic_url():
print (f"In dynamic_url() with {app.current_request.context['identity']['sourceIp']}")
if 'proxy' in app.current_request.uri_params:
path = app.current_request.uri_params['proxy']
# for timing sake
request_start = time.time()
# We don't support favicon
if 'favicon.ico' in path:
print ('Ignoring favicon.')
return Response(body='', status_code=404)
# Check the path cache
if path in PATH_CACHE and PATH_CACHE[path]['IAT'] > time.time()-(60*CACHE_MINS):
if PATH_CACHE[path]['URI'] is not None:
print(f"Re-using cached response for {path} -> {PATH_CACHE[path]['URI']}")
return good_path(PATH_CACHE[path]['URI'], path, request_start, True)
return bad_path(path, request_start, True)
uri = find_answer(path)
elapsed = round(time.time() - request_start,3)
if uri:
print(f"It took {elapsed} seconds to map {path} to {uri}")
print (f"Redirecting to {uri}, caching url for {CACHE_MINS} Minutes")
PATH_CACHE[path] = { 'IAT': time.time(), 'URI': uri }
return good_path(uri, path, request_start, False)
print(f"Wasted {elapsed} seconds failing to find a uri for {path}")
PATH_CACHE[path] = { 'IAT': time.time(), 'URI': None }
return bad_path(path, request_start)
else:
print (f"Bad Request: {app.current_request}")
return Response(body="<html><title>?</title><body>????<br>???? - ??<br>?????</body></html>", status_code=404, headers={})
requests==2.24.0
requests-futures==1.0.0
chalice==1.18.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment