|
from requests import Session |
|
import requests.exceptions |
|
from requests_futures.sessions import FuturesSession |
|
from concurrent.futures import as_completed |
|
from chalice import Chalice, Response |
|
import json |
|
import time |
|
import os |
|
import base64 |
|
|
|
#### Config |
|
# Potential remote source |
|
HOSTS = [ 'cumulus.asf.alaska.edu', 'sentinel1.asf.alaska.edu'] |
|
# If the above don't respond successfully, pit the request here: |
|
DEFAULT_HOST = 'archive.asf.alaska.edu' |
|
|
|
# How long to keep an accessed url cache to avoid rapid retry |
|
CACHE_MINS = 5 # Number of minutes to cache a request |
|
|
|
# Cookie to grab metrics |
|
JWT_COOKIE_NAME = 'asf-urs' |
|
|
|
# URL cache |
|
PATH_CACHE = {} |
|
|
|
# If true, don't forward unknown requests to DEFAULT_HOST |
|
OUTAGE = os.getenv('OUTAGE') |
|
|
|
# Re-try all incoming URLS as HEAD requests, simultaneous, at all HOSTS entries |
|
def find_answer(path): |
|
print(f"Finding host for {path} the dynamic way") |
|
with FuturesSession() as session: |
|
futures = [ session.head(f'https://{host}/{path}') for host in HOSTS ] |
|
for future in as_completed(futures): |
|
try: |
|
resp = future.result() |
|
if resp.status_code in (200, 303, 302): |
|
return resp.url |
|
else: |
|
print (f"{resp.url} with {resp.status_code} (miss!)") |
|
|
|
except requests.exceptions.HTTPError as E: |
|
print(f"Problem fetching {E.request.url}: {E}") |
|
except requests.exceptions.ConnectionError as E: |
|
print(f"Problem fetching {E.request.url}: {E}") |
|
except Exception as E: |
|
print(f"Problem:{E}") |
|
|
|
return False |
|
|
|
app = Chalice(app_name='disambiguate') |
|
|
|
# Cookie logging for metrics gathering |
|
def get_cookie_username(): |
|
headers = app.current_request.headers |
|
if 'cookie' in headers: |
|
cookie=headers['cookie'] |
|
try: |
|
all_cookies = {k: v for k,v in ( x.split("=",1) for x in cookie.split("; ") )} |
|
if JWT_COOKIE_NAME in all_cookies: |
|
# Try to decode part of the JWT token to get user id |
|
urs_cookie = all_cookies[JWT_COOKIE_NAME] |
|
jwt_b64 = bytes(urs_cookie.split('.')[1], 'utf-8') + b'===' |
|
jwt_payload = base64.b64decode(jwt_b64) |
|
urs_user_id = json.loads(jwt_payload)['urs-user-id'] |
|
return urs_user_id |
|
print (f"No usable {JWT_COOKIE_NAME} cookie found in {all_cookies}") |
|
except Exception as E: |
|
print(f"Could not decode {cookie}: {E}") |
|
return None |
|
|
|
def json_log(path, response, result, timer, uri=None): |
|
log_json = {"sourceIp": app.current_request.context['identity']['sourceIp'], |
|
"path": path, "response": response, "timer": timer, |
|
"redirect": uri, "result":result } |
|
user_id = get_cookie_username() |
|
if user_id: |
|
log_json["edl"] = user_id |
|
print ( json.dumps( log_json ) ) |
|
|
|
def good_path(uri, path, time_in, reuse=False): |
|
timer = round(time.time() - time_in, 3) |
|
result = "Successful Redirect" + (" W/ URI Cache Hit" if reuse else "") |
|
json_log(path, 303, result, timer, uri) |
|
headers = {'Cache-Control': 'private, max-age=3600', |
|
'Location': uri } |
|
return Response(body='', headers=headers, status_code=303) |
|
|
|
# Awful trash formatting... sorry, not sorry. |
|
def bad_path(path, time_in, reuse=False): |
|
timer = round(time.time() - time_in, 3) |
|
if OUTAGE: |
|
result = "Outage Message" + (" W/ URI Cache Hit" if reuse else "") |
|
json_log(path, 503, result, timer) |
|
title_text = "Sorry, data is currently unavailable" |
|
pre_format = "border: 3px double silver; background-color: lightgrey; display: inline-block" |
|
body_text = "Due to system maintenance, the file you requested is currently not available: " + \ |
|
f"<pre style='{pre_format}'>{path}</pre> " + \ |
|
"<br>Please contact us at <a href='mailto:uso@asf.alaska.edu'>uso@asf.alaska.edu</a> if you have any questions." |
|
return Response(body=f"<html><title>{title_text}</title><body>{body_text}</body></html>", |
|
status_code=503, headers={'Content-Type':'text/html'}) |
|
|
|
default_uri = f"https://{DEFAULT_HOST}/{path}" |
|
result = "Default Passthrough" + (" W/ URI Cache Hit" if reuse else "") |
|
json_log(path, 404, result, timer, default_uri) |
|
return Response(body='', headers={'Location': default_uri}, status_code=303) |
|
|
|
|
|
@app.route('/') |
|
def root(): |
|
print ("In root()") |
|
return Response(body="<html><title>Nothing Here</title><body>Nothing Here 🤷</body></html>", status_code=200, headers={'Content-Type':'text/html'}) |
|
|
|
@app.route('/{proxy+}', methods=['GET', 'HEAD']) |
|
def dynamic_url(): |
|
|
|
print (f"In dynamic_url() with {app.current_request.context['identity']['sourceIp']}") |
|
|
|
if 'proxy' in app.current_request.uri_params: |
|
path = app.current_request.uri_params['proxy'] |
|
|
|
# for timing sake |
|
request_start = time.time() |
|
|
|
# We don't support favicon |
|
if 'favicon.ico' in path: |
|
print ('Ignoring favicon.') |
|
return Response(body='', status_code=404) |
|
|
|
# Check the path cache |
|
if path in PATH_CACHE and PATH_CACHE[path]['IAT'] > time.time()-(60*CACHE_MINS): |
|
if PATH_CACHE[path]['URI'] is not None: |
|
print(f"Re-using cached response for {path} -> {PATH_CACHE[path]['URI']}") |
|
return good_path(PATH_CACHE[path]['URI'], path, request_start, True) |
|
return bad_path(path, request_start, True) |
|
|
|
uri = find_answer(path) |
|
elapsed = round(time.time() - request_start,3) |
|
|
|
if uri: |
|
print(f"It took {elapsed} seconds to map {path} to {uri}") |
|
print (f"Redirecting to {uri}, caching url for {CACHE_MINS} Minutes") |
|
PATH_CACHE[path] = { 'IAT': time.time(), 'URI': uri } |
|
return good_path(uri, path, request_start, False) |
|
|
|
print(f"Wasted {elapsed} seconds failing to find a uri for {path}") |
|
PATH_CACHE[path] = { 'IAT': time.time(), 'URI': None } |
|
return bad_path(path, request_start) |
|
|
|
else: |
|
print (f"Bad Request: {app.current_request}") |
|
return Response(body="<html><title>?</title><body>????<br>???? - ??<br>?????</body></html>", status_code=404, headers={}) |