Last active
April 23, 2022 07:05
-
-
Save LukePrior/139199d2070a44d006bafb6de974b1b9 to your computer and use it in GitHub Desktop.
This script calculates updated launch site information for SondeHub using historical data from the last year
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import math | |
from datetime import datetime, timedelta | |
import numpy as np | |
import boto3 | |
from botocore import UNSIGNED | |
from botocore.config import Config | |
launch_site_url = 'https://api.v2.sondehub.org/sites' | |
binned_data_bucket = 'sondehub-history' | |
rs_types_lookup = { | |
"LMS6-403": "11", | |
"RS92": "14", | |
"DFM-09": "17", | |
"DFM-06": "18", | |
"MRZ-N1": "19", | |
"RS-11G": "22", | |
"RS41": "41", | |
"iMet-4": "34", | |
"iMS-100": "35", | |
"RS92-NGP": "52", | |
"DFM-17": "54", | |
"MRZ-3MK": "62", | |
"M20": "63", | |
"M10": "77", | |
"LMS6-1680": "82", | |
"iMet-54": "84", | |
"iMet-1": "07", | |
"PAZA-12M": "15", | |
"PAZA-22": "16", | |
"MK3": "20", | |
"1524LA LORAN-C/GL5000": "21", | |
"SRS-C34": "26", | |
"AVK-MRZ": "27", | |
"AVK–AK2-02": "28", | |
"MARZ2-2": "29", | |
"RS2-80": "30", | |
"GTS1-2/GFE(L)": "33", | |
"CF-06": "45", | |
"AVK-BAR": "58", | |
"M2K2-R": "59", | |
"AVK-RZM-2": "68", | |
"MARL-A/Vektor-M-RZM-2": "69", | |
"MARL-A": "73", | |
"RS90": "78", | |
"RS92": "80", | |
"MARL-A/Vektor-M-MRZ": "88", | |
"MARL-A/Vektor-M-BAR": "89", | |
"iMet-2": "99" | |
} | |
# Configure S3 bucket | |
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) | |
paginator = s3.get_paginator('list_objects_v2') | |
now = datetime.now() | |
def round_time(dt=None, roundTo=60): | |
"""Round a datetime object to any time lapse in seconds | |
dt : datetime.datetime object, default now. | |
roundTo : Closest number of seconds to round to, default 1 minute. | |
Author: Thierry Husson 2012 - Use it as you want but don't blame me. | |
""" | |
if dt == None : dt = datetime.now() | |
seconds = (dt.replace(tzinfo=None) - dt.min).seconds | |
rounding = (seconds+roundTo/2) // roundTo * roundTo | |
return dt + timedelta(0,rounding-seconds,-dt.microsecond) | |
# Get atmospheric density at altitude | |
def get_density(altitude): | |
# Constants | |
airMolWeight = 28.9644 # Molecular weight of air | |
densitySL = 1.225 # Density at sea level [kg/m3] | |
pressureSL = 101325 # Pressure at sea level [Pa] | |
temperatureSL = 288.15 # Temperature at sea level [deg K] | |
gamma = 1.4 | |
gravity = 9.80665 # Acceleration of gravity [m/s2] | |
tempGrad = -0.0065 # Temperature gradient [deg K/m] | |
RGas = 8.31432 # Gas constant [kg/Mol/K] | |
R = 287.053 | |
deltaTemperature = 0.0 | |
# Lookup Tables | |
altitudes = [0, 11000, 20000, 32000, 47000, 51000, 71000, 84852] | |
pressureRels = [ | |
1, | |
2.23361105092158e-1, | |
5.403295010784876e-2, | |
8.566678359291667e-3, | |
1.0945601337771144e-3, | |
6.606353132858367e-4, | |
3.904683373343926e-5, | |
3.6850095235747942e-6, | |
] | |
temperatures = [288.15, 216.65, 216.65, 228.65, 270.65, 270.65, 214.65, 186.946] | |
tempGrads = [-6.5, 0, 1, 2.8, 0, -2.8, -2, 0] | |
gMR = gravity * airMolWeight / RGas | |
# Pick a region to work in | |
i = 0 | |
if altitude > 0: | |
while altitude > altitudes[i + 1]: | |
i = i + 1 | |
# Lookup based on region | |
baseTemp = temperatures[i] | |
tempGrad = tempGrads[i] / 1000.0 | |
pressureRelBase = pressureRels[i] | |
deltaAltitude = altitude - altitudes[i] | |
temperature = baseTemp + tempGrad * deltaAltitude | |
# Calculate relative pressure | |
if math.fabs(tempGrad) < 1e-10: | |
pressureRel = pressureRelBase * math.exp( | |
-1 * gMR * deltaAltitude / 1000.0 / baseTemp | |
) | |
else: | |
pressureRel = pressureRelBase * math.pow( | |
baseTemp / temperature, gMR / tempGrad / 1000.0 | |
) | |
# Add temperature offset | |
temperature = temperature + deltaTemperature | |
# Finally, work out the density... | |
speedOfSound = math.sqrt(gamma * R * temperature) | |
pressure = pressureRel * pressureSL | |
density = densitySL * pressureRel * temperatureSL / temperature | |
return density | |
# Get sea level descent rate | |
def get_descent_rate(descent_rate, altitude): | |
rho = get_density(altitude) | |
return math.sqrt((rho / 1.225) * math.pow(descent_rate, 2)) | |
# Check how old flight is | |
def check_flight_age(date): | |
date = date.split('/', 1)[1] | |
date = date.split('/', 1)[1] | |
date = date.rsplit('/', 1)[0] | |
date = datetime.strptime(date, '%Y/%m/%d') | |
return date | |
# Get flight serial from URL | |
def get_flight_serial(key): | |
serial = key.rsplit('/', 1)[1] | |
serial = serial.split('.json', 1)[0] | |
return serial | |
# Download flight summary given key | |
def get_flight_data(key): | |
data = s3.get_object(Bucket=binned_data_bucket, Key=key) | |
return json.loads(data['Body'].read().decode('utf-8')) | |
# Get burst altitude from data | |
def check_flight_burst(data): | |
initial = data[0]['alt'] | |
burst = data[1]['alt'] | |
final = data[2]['alt'] | |
if burst < 10000: | |
return None | |
if burst < initial or burst < final: | |
return None | |
return burst | |
# Get frequency from data | |
def check_flight_frequency(data): | |
frequency = data[0]['frequency'] | |
for entry in data: | |
if abs(entry['frequency'] - frequency) > 0.002: | |
return None | |
return round(frequency, 2) | |
# Get sonde type from data | |
def check_flight_type(data): | |
sonde_type = data[0]['type'] | |
for entry in data: | |
if entry['type'] != sonde_type: | |
return None | |
return sonde_type | |
# Get ascent rate from data | |
def check_flight_ascent(data): | |
initial = data[0] | |
burst = data[1] | |
height = burst['alt'] - initial['alt'] | |
if height < 1000: | |
return None | |
initial_time = initial['time_received'] | |
initial_date = datetime.strptime(initial_time, '%Y-%m-%dT%H:%M:%S.%fZ') | |
burst_time = burst['time_received'] | |
burst_date = datetime.strptime(burst_time, '%Y-%m-%dT%H:%M:%S.%fZ') | |
difference = burst_date - initial_date | |
if difference.seconds < 60: | |
return None | |
ascent_rate = height / difference.seconds | |
return ascent_rate | |
# Get descent rate from data | |
def check_flight_descent(data): | |
burst = data[1] | |
final = data[2] | |
if final['alt'] > burst['alt']: | |
return None | |
if 'vel_v' not in final: | |
return None | |
if final['vel_v'] > 0: | |
return None | |
if final['alt'] > 12000: | |
return None | |
descent = get_descent_rate(final['vel_v'], final['alt']) | |
return descent | |
# Get estimated launch time from data | |
def check_flight_launch(data): | |
initial = data[0] | |
burst = data[1] | |
height = burst['alt'] - initial['alt'] | |
if height < 1000: | |
return None | |
initial_time = initial['time_received'] | |
initial_date = datetime.strptime(initial_time, '%Y-%m-%dT%H:%M:%S.%fZ') | |
burst_time = burst['time_received'] | |
burst_date = datetime.strptime(burst_time, '%Y-%m-%dT%H:%M:%S.%fZ') | |
difference = burst_date - initial_date | |
if difference.seconds < 60: | |
return None | |
ascent_rate = height / difference.seconds | |
ascent_time = initial['alt'] / ascent_rate | |
launch_date = initial_date - timedelta(seconds=ascent_time) | |
launch_date = round_time(launch_date, 60*60*3) | |
return launch_date | |
# Check for launch time patterns given array of datetimes | |
def check_launch_times(times): | |
launches = [] | |
first = times[-1] | |
last = times[0] | |
days = (last-first).days | |
hours = [0,3,6,9,12,15,18,21] | |
for hour in hours: | |
current = first | |
count = 0 | |
current = current.replace(hour=hour) | |
while last > current: | |
if current in times: | |
count += 1 | |
current = current + timedelta(days=1) | |
if (count > days*0.8): | |
launches.append("0:{0:02d}:00".format(hour)) | |
return launches | |
# Convert string to floats | |
def clean_data(data): | |
for point in data: | |
if 'alt' in point: | |
point['alt'] = float(point['alt']) | |
if 'vel_v' in point: | |
point['vel_v'] = float(point['vel_v']) | |
return data | |
# remove outliers from numpy list | |
def reject_outliers(data, m = 2.): | |
d = np.abs(data - np.median(data)) | |
mdev = np.median(d) | |
s = d/mdev if mdev else 0. | |
return data[s<m] | |
# Get sites | |
launch_site_request = requests.get(launch_site_url) | |
launch_sites = launch_site_request.json() | |
# Get binned flights | |
binned_data = s3.list_objects_v2( | |
Bucket=binned_data_bucket, | |
Prefix ='launchsites/', | |
Delimiter='/', | |
MaxKeys=1000 ) | |
binned_data = binned_data['CommonPrefixes'] | |
# Generate list of sites we have data for | |
eligable_launch_sites = [] | |
for site in binned_data: | |
site_id = site['Prefix'] | |
site_id = site_id.replace('launchsites/', '') | |
site_id = site_id.replace('/', '') | |
eligable_launch_sites.append(site_id) | |
# Start processing | |
print('Fetching {} launch sites!'.format(len(eligable_launch_sites))) | |
# Get list of flights | |
eligable_site_data = {} | |
i = 0 | |
for site in eligable_launch_sites: | |
i+=1 | |
print('{}/{} - Fetching launch site #{}'.format(i,len(eligable_launch_sites), site)) | |
eligable_site_data[site] = {} | |
eligable_site_data[site]['launches'] = [] | |
pages = paginator.paginate(Bucket=binned_data_bucket, Prefix='launchsites/{}/'.format(site)) | |
for page in pages: | |
for obj in page['Contents']: | |
date = check_flight_age(obj['Key']) | |
difference = now - date | |
days = difference.days | |
if days > 365: | |
continue | |
serial = get_flight_serial(obj['Key']) | |
eligable_site_data[site]['launches'].insert(0, {'key': obj['Key'], 'date': date}) | |
# Check if enough data for site | |
i = 0 | |
for site in eligable_site_data: | |
i+=1 | |
entries = len(eligable_site_data[site]['launches']) | |
if entries < 30: | |
print('{}/{} - Skipping launch site #{}'.format(i,len(eligable_launch_sites), site)) | |
continue | |
print('{}/{} - Processing launch site #{}'.format(i,len(eligable_launch_sites), site)) | |
eligable_site_data[site]['summary'] = {} | |
eligable_site_data[site]['summary']['types'] = {} | |
temp_serials = [] | |
for flight in eligable_site_data[site]['launches']: | |
try: | |
data = get_flight_data(flight['key']) | |
if data[0]['serial'] in temp_serials: | |
eligable_site_data[site]['launches'].remove(flight) | |
continue | |
temp_serials.append(data[0]['serial']) | |
flight['burst'] = check_flight_burst(data) | |
flight['frequency'] = check_flight_frequency(data) | |
flight['type'] = check_flight_type(data) | |
difference = now - flight['date'] | |
days = difference.days | |
flight['ascent_rate'] = check_flight_ascent(data) | |
flight['descent_rate'] = check_flight_descent(data) | |
flight['launch'] = check_flight_launch(data) | |
if flight['type'] != None and days <= 90: | |
if flight['type'] in eligable_site_data[site]['summary']['types']: | |
eligable_site_data[site]['summary']['types'][flight['type']]['count'] = eligable_site_data[site]['summary']['types'][flight['type']]['count'] + 1 | |
else: | |
eligable_site_data[site]['summary']['types'][flight['type']] = {} | |
eligable_site_data[site]['summary']['types'][flight['type']]['count'] = 1 | |
if flight['frequency'] != None: | |
if 'frequencies' not in eligable_site_data[site]['summary']['types'][flight['type']]: | |
eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'] = {} | |
if flight['frequency'] in eligable_site_data[site]['summary']['types'][flight['type']]['frequencies']: | |
eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'][flight['frequency']] = eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'][flight['frequency']] + 1 | |
else: | |
eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'][flight['frequency']] = 1 | |
if flight['launch'] != None: | |
if 'times' not in eligable_site_data[site]['summary']['types'][flight['type']]: | |
eligable_site_data[site]['summary']['types'][flight['type']]['times'] = [] | |
eligable_site_data[site]['summary']['types'][flight['type']]['times'].append(flight['launch']) | |
except: | |
pass | |
temp_count = -1 | |
for sonde_type in eligable_site_data[site]['summary']['types']: | |
if eligable_site_data[site]['summary']['types'][sonde_type]['count'] > temp_count: | |
eligable_site_data[site]['summary']['common_type'] = sonde_type | |
temp_count = eligable_site_data[site]['summary']['types'][sonde_type]['count'] | |
bursts = np.empty((0)) | |
ascents = np.empty((0)) | |
descents = np.empty((0)) | |
for flight in eligable_site_data[site]['launches']: | |
try: | |
if flight['type'] != eligable_site_data[site]['summary']['common_type']: | |
continue | |
if flight['burst'] != None: | |
bursts = np.append(bursts, flight['burst']) | |
if flight['ascent_rate'] != None: | |
ascents = np.append(ascents, flight['ascent_rate']) | |
if flight['descent_rate'] != None: | |
descents = np.append(descents, flight['descent_rate']) | |
except: | |
pass | |
eligable_site_data[site]['summary']['rs_types'] = [] | |
for sonde_type in eligable_site_data[site]['summary']['types']: | |
if 'frequencies' in eligable_site_data[site]['summary']['types'][sonde_type]: | |
common_frequency = max(eligable_site_data[site]['summary']['types'][sonde_type]['frequencies'], key=eligable_site_data[site]['summary']['types'][sonde_type]['frequencies'].get) | |
if eligable_site_data[site]['summary']['types'][sonde_type]['frequencies'][common_frequency] > eligable_site_data[site]['summary']['types'][sonde_type]['count']*0.9: | |
eligable_site_data[site]['summary']['types'][sonde_type]['frequency'] = common_frequency | |
eligable_site_data[site]['summary']['types'][sonde_type].pop('frequencies', None) | |
if 'times' in eligable_site_data[site]['summary']['types'][sonde_type]: | |
launch_times = check_launch_times(eligable_site_data[site]['summary']['types'][sonde_type]['times']) | |
if len(launch_times) > 0: | |
eligable_site_data[site]['summary']['times'] = launch_times | |
eligable_site_data[site]['summary']['types'][sonde_type].pop('times', None) | |
if 'frequency' in eligable_site_data[site]['summary']['types'][sonde_type]: | |
temp = [rs_types_lookup[sonde_type], str(eligable_site_data[site]['summary']['types'][sonde_type]['frequency'])] | |
eligable_site_data[site]['summary']['rs_types'].append(temp) | |
else: | |
eligable_site_data[site]['summary']['rs_types'].append(rs_types_lookup[sonde_type]) | |
eligable_site_data[site]['summary'].pop('types', None) | |
bursts = reject_outliers(bursts) | |
ascents = reject_outliers(ascents) | |
descents = reject_outliers(descents) | |
if 'common_type' in eligable_site_data[site]['summary']: | |
eligable_site_data[site]['summary'].pop('common_type', None) | |
if (np.count_nonzero(bursts) > 30): | |
eligable_site_data[site]['summary']['burst_altitude'] = round(np.mean(bursts)/100)*100 | |
eligable_site_data[site]['summary']['burst_std'] = round(np.std(bursts)/10)*10 | |
eligable_site_data[site]['summary']['burst_samples'] = np.count_nonzero(bursts) | |
if (np.count_nonzero(ascents) > 30): | |
eligable_site_data[site]['summary']['ascent_rate'] = round(np.mean(ascents), 1) | |
eligable_site_data[site]['summary']['ascent_std'] = round(np.std(ascents), 2) | |
eligable_site_data[site]['summary']['ascent_samples'] = np.count_nonzero(ascents) | |
if (np.count_nonzero(descents) > 30): | |
eligable_site_data[site]['summary']['descent_rate'] = round(np.mean(descents), 1) | |
eligable_site_data[site]['summary']['descent_std'] = round(np.std(descents), 2) | |
eligable_site_data[site]['summary']['descent_samples'] = np.count_nonzero(descents) | |
print(json.dumps(launch_sites[site])) | |
updated_site = launch_sites[site] | |
if eligable_site_data[site]['summary']['rs_types'] != None: | |
updated_site['rs_types'] = eligable_site_data[site]['summary']['rs_types'] | |
if eligable_site_data[site]['summary']['ascent_rate'] != None: | |
if ('ascent_rate' not in updated_site or | |
'ascent_samples' not in updated_site or | |
'ascent_std' not in updated_site or | |
eligable_site_data[site]['summary']['ascent_samples'] >= (updated_site['ascent_samples'] - 10) or | |
eligable_site_data[site]['summary']['ascent_std'] < (updated_site['ascent_std'])): | |
updated_site['ascent_rate'] = eligable_site_data[site]['summary']['ascent_rate'] | |
updated_site['ascent_std'] = eligable_site_data[site]['summary']['ascent_std'] | |
updated_site['ascent_samples'] = eligable_site_data[site]['summary']['ascent_samples'] | |
if eligable_site_data[site]['summary']['burst_altitude'] != None: | |
if ('burst_altitude' not in updated_site or | |
'burst_samples' not in updated_site or | |
'burst_std' not in updated_site or | |
eligable_site_data[site]['summary']['burst_samples'] >= (updated_site['burst_samples'] - 10) or | |
eligable_site_data[site]['summary']['burst_std'] < (updated_site['burst_std'])): | |
updated_site['burst_altitude'] = eligable_site_data[site]['summary']['burst_altitude'] | |
updated_site['burst_std'] = eligable_site_data[site]['summary']['burst_std'] | |
updated_site['burst_samples'] = eligable_site_data[site]['summary']['burst_samples'] | |
if eligable_site_data[site]['summary']['descent_rate'] != None: | |
if ('descent_rate' not in updated_site or | |
'descent_samples' not in updated_site or | |
'descent_std' not in updated_site or | |
eligable_site_data[site]['summary']['descent_samples'] >= (updated_site['descent_samples'] - 10) or | |
eligable_site_data[site]['summary']['descent_std'] < (updated_site['descent_std'])): | |
updated_site['descent_rate'] = eligable_site_data[site]['summary']['descent_rate'] | |
updated_site['descent_std'] = eligable_site_data[site]['summary']['descent_std'] | |
updated_site['descent_samples'] = eligable_site_data[site]['summary']['descent_samples'] | |
print(json.dumps(updated_site)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment