Last active
September 19, 2023 12:14
-
-
Save alex-hhh/e1a4ffe3e861ad3fd31f6655a3a66b26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Update weather data for sessions in an ActivityLog2 database using data | |
from the Meteostat service. The script will find sessions that have | |
latitude/longitude coordinates but no weather data, and will attempt to fetch | |
weather info from the Meteostat service and store it attached to the each | |
session. Sessions that already have weather data will not be updated. | |
For ActivityLog2, see https://github.com/alex-hhh/ActivityLog2 | |
For Meteostat, see https://meteostat.net/en/ | |
To use it, you will need to install Python 3, then install the meteostat | |
package using "python3 -m pip install meteostat". You will also need to | |
update this script to point to your AL2 database, see the "USER CONFIGURATION | |
OPTIONS" section at the end of this script. | |
ActivityLog2 stores all data in a SQLite3 database and this script will | |
accesss the database file directly to fetch information about sessions and add | |
weather records to them. You can find the database schema file at: | |
https://github.com/alex-hhh/ActivityLog2/blob/master/sql/db-schema.sql | |
""" | |
from datetime import datetime, timedelta, timezone; | |
import meteostat as mto; # python3 -m pip install meteostat | |
import os | |
import pandas as pd; | |
import sqlite3; | |
import sys; | |
import time; | |
LOG_ERROR = 0; | |
LOG_WARNING = 1; | |
LOG_INFO = 2; | |
LOG_DETAIL = 3; | |
LOG_DEBUG = 4; | |
def log_level_2_name(level): | |
"""Convert a log level code into a name to be used for display by the | |
`log` function below | |
""" | |
if level == LOG_ERROR: | |
return "ERROR"; | |
elif level == LOG_WARNING: | |
return "WARNING"; | |
elif level == LOG_INFO: | |
return "INFO"; | |
elif level == LOG_DETAIL: | |
return "DETAIL"; | |
elif level == LOG_DEBUG: | |
return "DEBUG"; | |
else: | |
return "UNKNOWN LEVEL"; | |
def log(level, msg): | |
"""Log a message at the specified log level. | |
There is some control over what messages are logged and where the logs go | |
(to the console and/or to the file). See settings of the log function | |
below. | |
""" | |
# The log function puts a timestamp as the number of seconds since the | |
# program start -- this is more useful for this type of program than a | |
# timestamp. | |
if level > log.level: | |
return; # skip logging these messages | |
lvl = log_level_2_name(level); | |
duration = (datetime.now() - log.start_time).total_seconds(); | |
msg = f"[{duration:>8.1f}] {lvl} {msg}"; | |
if log.to_stdout: | |
print(msg); | |
if log.file_output != None: | |
log.file_output.write(msg); | |
log.file_output.write('\n'); | |
log.level = LOG_INFO; # drop log messages with lower priority than this one | |
log.to_stdout = True; # Whether to log to stdout | |
log.file_output = None; # File to log output to (initialized later) | |
log.start_time = datetime.now(); # Tell the log function when the program started | |
def fetch_sessions_without_weather_data(dbc): | |
"""Return the sessions with no weather data from the database cursor DBC. | |
We look for sessions which have a latitude/longitude point (cannot fetch | |
weather if we don't know the location), but no record SESSION_WEATHER. | |
Returns a list of sessions, for each row we have "session id", "timestamp" | |
(when the session started) and latitude/longitude location. | |
""" | |
dbc.execute(''' | |
select P.session_id, min(T.timestamp), T.position_lat, T.position_long | |
from A_TRACKPOINT T, | |
A_LENGTH L, | |
A_LAP P | |
where L.lap_id = P.id | |
and T.length_id = L.id | |
and T.position_lat is not null | |
and T.position_long is not null | |
and P.session_id not in (select session_id from SESSION_WEATHER) | |
group by P.session_id | |
order by P.session_id desc | |
'''); | |
return dbc.fetchall(); | |
## SQL query to insert a weather observation record. Used by | |
## `put_session_weather` | |
insert_session_weather_sql = ''' | |
insert into SESSION_WEATHER( | |
session_id, | |
wstation, | |
weather_status_id, | |
timestamp, | |
temperature, | |
dew_point, | |
humidity, | |
wind_speed, | |
wind_gusts, | |
wind_direction, | |
pressure, | |
position_lat, | |
position_long) | |
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) | |
'''; | |
def put_session_weather(dbc, session_id, wobs): | |
"""Store in the database cursor DBC the weather observation data in WOBS | |
for the sesion identified by SESSION_ID. | |
WOBS is a hash table containing weather parameters for a session, such as | |
temperature, humidity, etc. | |
WARNING: you will need to commit the database transaction for the changes | |
to take effect. This function does not commit the transaction. | |
""" | |
timestamp = wobs['timestamp']; # must exist | |
wstation = wobs['wstation']; # must exist | |
# All other weather data is optional and OK not to exist | |
weather_status_id = wobs['weather_status_id'] if 'weather_status_id' in wobs else None; | |
temperature = wobs['temperature'] if 'temperature' in wobs else None; | |
dew_point = wobs['dew_point'] if 'dew_point' in wobs else None; | |
humidity = wobs['humidity'] if 'humidity' in wobs else None; | |
wind_speed = wobs['wind_speed'] if 'wind_speed' in wobs else None; | |
wind_direction = wobs['wind_direction'] if 'wind_direction' in wobs else None; | |
wind_gusts = wobs['wind_gusts'] if 'wind_gusts' in wobs else None; | |
pressure = wobs['pressure'] if 'pressure' in wobs else None; | |
position_lat = wobs['position_lat'] if 'position_lat' in wobs else None; | |
position_long = wobs['position_long'] if 'position_long' in wobs else None; | |
dbc.execute( | |
insert_session_weather_sql, | |
(session_id, | |
wstation, | |
weather_status_id, | |
timestamp, | |
temperature, | |
dew_point, | |
humidity, | |
wind_speed, | |
wind_gusts, | |
wind_direction, | |
pressure, | |
position_lat, | |
position_long)); | |
def nearby_stations(lat, lon, count = 10): | |
"""Return the list of nearby stations around the position LAT/LON. At | |
most COUNT stations are returned in a Pandas data frame. | |
""" | |
return mto.Stations().nearby(lat, lon).fetch(count); | |
def hourly_observations(station_id, t, hours_around = 5): | |
"""Return a Pandas data frame for hourly observations for the station | |
STATION_ID. Observations are returned around the timestamp T, a datetime | |
object. | |
""" | |
delta = timedelta(hours = hours_around); | |
return mto.Hourly(station_id, t - delta, t + delta).fetch(); | |
# Map meteostat weather codes to AL2 weather codes. See bottom of this file | |
# for the weather code mappings. | |
coco_to_al2codes = { | |
1 : 0, 2 : 1, 3 : 22, 4 : 2, 5 : 8, 6 : 8, 7 : 16 , | |
8 : 3, 9 : 17, 10 : 3, 11 : 17, 12 : 20, 13 : 21, | |
14 : 18, 15 : 4, 16 : 19, 17 : 13, 18 : 13, 19 : 13, | |
20 : 21, 21 : 4, 22 : 19, 23 : 14, 24 : 12, 25 : 6, | |
26 : 6, 27 : 6}; | |
def coco2al2(code): | |
"""Return the AL2 weather code (weather_status_id) that corresponds to the | |
Meteostat weather CODE. Return None if the code cannot be mapped. | |
""" | |
try: | |
return coco_to_al2codes[int(code)]; | |
except: | |
return None; | |
def extract_observations(timestamp, observations): | |
"""Extract the closest weather data to TIMESTAMP (a unix timestamp) from | |
OBSERVATIONS, a Pandas data frame containing observations indexed by a | |
timestamp. Returns a dictionary of weather values to match an AL2 weather | |
record. | |
""" | |
# NOTE: Meteostat deals with timestamps in UTC format, but also uses | |
# "naive" date time objects. So we need to convert our timestamp into a | |
# UTC date time, than drop the timezone info. Yes, this is somewhat dumb. | |
ts = datetime.fromtimestamp(timestamp, tz = timezone.utc).replace(tzinfo = None); | |
obs_low = observations[observations.index < ts]; | |
obs_high = observations[observations.index > ts]; | |
timestamp_low = time.mktime(obs_low.index[-1].timetuple()); | |
timestamp_high = time.mktime(obs_high.index[0].timetuple()); | |
obs = obs_high; | |
obs_idx = 0; | |
if (timestamp - timestamp_low) < (timestamp_high - timestamp): | |
obs = obs_low; | |
obs_idx = -1; | |
# Meteostat wind speeds and gusts are in km/h, but Al2 stores them in | |
# meters/seconds, so we convert them here. | |
wspd = obs['wspd'][obs_idx]; | |
if wspd != None and not pd.isnull(wspd): | |
wspd = wspd / 3.6; | |
wpgt = obs['wpgt'][obs_idx]; | |
if wpgt != None and not pd.isnull(wpgt): | |
wpgt = wpgt / 3.6; | |
wobs = { | |
'timestamp': int(time.mktime(obs.index[-1].timetuple())), | |
'temperature' : obs['temp'][obs_idx], | |
'dew_point' : obs['dwpt'][obs_idx], | |
'humidity': obs['rhum'][obs_idx], | |
'wind_speed' : wspd, | |
'wind_direction' : obs['wdir'][obs_idx], | |
'wind_gusts' : wpgt, | |
'pressure' : obs['pres'][obs_idx], | |
'weather_status_id' : coco2al2(obs['coco'][obs_idx]) | |
}; | |
return wobs; | |
# Columns we want to print out for weather stations in | |
# `get_weather_observations` | |
print_columns = [ | |
'name', 'country', 'region','wmo', 'icao', 'latitude', | |
'longitude', 'elevation', 'timezone', 'distance']; | |
def get_weather_observations(session_id, timestamp, lat, lon, max_station_distance): | |
"""Return weather observations for the session SESSION_ID at TIMESTAMP. | |
The function will look for a weather station at most MAX_STATION_DISTANCE | |
(in meters) from the LAT/LON coordinate, assumed to be the start location | |
for the session identified by SESSION_ID. | |
If such a weather station is found, it will try to fetch observations | |
around TIMESTAMP, a UNIX timestamp, assumed to be the start time of the | |
session identified by SESSION_ID. | |
Returns a hash table with weather observations or None, if no such | |
observations are found. | |
""" | |
# NOTE: Meteostat deals with timestamps in UTC format, but also uses | |
# "naive" date time objects. So we need to convert our timestamp into a | |
# UTC date time, than drop the timezone info. Yes, this is somewhat dumb. | |
t = datetime.fromtimestamp(timestamp, tz = timezone.utc).replace(tzinfo = None); | |
log(LOG_DETAIL, f"Session {session_id} start location lat = {lat}, lon = {lon}"); | |
log(LOG_DETAIL, f"Will search weather data around {t} ({timestamp})"); | |
nearby = nearby_stations(lat, lon) | |
if len(nearby.index) == 0: | |
log(LOG_WARNING, f"No nearby weather stations found for session {session_id}"); | |
return None; | |
log(LOG_DEBUG, "Candidate weather stations:"); | |
log(LOG_DEBUG, nearby[print_columns]); | |
station_index = 0; | |
max_station_index = len(nearby.index); | |
while (station_index < max_station_index): | |
station_id = nearby.index[station_index]; | |
# NOTE: we could limit fetching data only from airport weather | |
# stations by looking for "not pd.isnull(nearby.icao[station_index])" | |
if nearby.distance[station_index] > max_station_distance: | |
log(LOG_WARNING, f"Station {station_id} ({nearby.name[station_index]}) is to far away, won't use it"); | |
station_index = station_index + 1; | |
continue; | |
log(LOG_DETAIL, f"Trying station {station_id}, {nearby.name[station_index]}"); | |
observations = hourly_observations(station_id, t); | |
if len(observations.index) == 0: | |
log(LOG_WARNING, f"Station {station_id} ({nearby.name[station_index]}) has no observations around {t} ({timestamp})"); | |
station_index = station_index + 1; | |
continue; | |
# Prepare "base" weather observations record with weather station | |
# info. | |
wobs = { | |
'wstation' : f"meteostat {station_id}", | |
'position_lat' : nearby['latitude'][0], | |
'position_long' : nearby['longitude'][0] | |
}; | |
log(LOG_DETAIL, f"Station {station_id} ({nearby.name[station_index]}) has observations around {t} ({timestamp})"); | |
log(LOG_DEBUG, observations); | |
# Get the observation closest to our timestamp (from the past) | |
weather_data = extract_observations(timestamp, observations); | |
# ... and add them to base | |
wobs.update(weather_data); | |
# ... remove None and Nan items from the weather observations. | |
clean_wobs = [(k, v) for (k, v) in wobs.items() | |
if v != None and not pd.isnull(v)]; | |
return dict(clean_wobs); # return when the first observations were found | |
log(LOG_WARNING, f"Could not find weather data for session {session_id}"); | |
return None; | |
def update_weather_for_sessions(dbc, activity_max_age, max_station_distance): | |
"""Update weather data for sessions in the databse DBC (a database cursor) | |
that don't already have weathre data. Only sessions whose start time is | |
less than ACTIVITY_MAX_AGE (in seconds) are considered. | |
MAX_STATION_DISTANCE defines the max distance in meters for a weather from | |
a session start location and it is passed to `get_weather_observations` | |
NOTE: weather data records are added to the database, but the database | |
transaction is not commited. To make changes permanent, the database | |
transaction needs to be commited using `connection.commit()`. | |
""" | |
candidates = fetch_sessions_without_weather_data(dbc); | |
cutoff = time.time() - activity_max_age; | |
for (session_id, timestamp, lat, lon) in candidates: | |
if timestamp < cutoff: | |
t = datetime.fromtimestamp(timestamp); | |
log(LOG_DETAIL, f"Skipping old session {session_id} from {t} ({timestamp})"); | |
continue; | |
wobs = get_weather_observations(session_id, timestamp, lat, lon, max_station_distance); | |
if wobs != None: | |
log(LOG_INFO, f"Storing weather data for session {session_id}"); | |
put_session_weather(dbc, session_id, wobs); | |
if __name__ == "__main__": | |
## USER CONFIGURATION OPTIONS | |
log.level = LOG_INFO; # logging level, can also use LOG_DETAIL or LOG_DEBUG | |
max_station_distance = 50 * 1000; # 50 km | |
activity_max_age = 365 * 24 * 60 * 60; # 1 year | |
db_file = "ActivityLog.db"; # database file | |
## NOTE that using sqlite3.connect() on a non-existent file will create a | |
## new database, but with no tables in it, creating a lot of confusing | |
## errors, so better check here if the file actualy exists. | |
if not os.path.exists(db_file): | |
log(LOG_ERROR, f"Cannot find database file at {db_file}"); | |
sys.exit(1); | |
log(LOG_INFO, f"Using database {db_file}"); | |
connection = sqlite3.connect(db_file); # open database | |
update_weather_for_sessions( | |
connection.cursor(), | |
activity_max_age, | |
max_station_distance); | |
connection.commit(); # commit transaction if everything went fine | |
connection.close(); # close connection | |
# Meteostat WeatherCondition codes (coco) mapping to AL2 weather codes, which | |
# are based on the FIT weather definitions: | |
# 1 Clear => 0 Clear | |
# 2 Fair => 1 Partly Cloudy | |
# 3 Cloudy => 22 Cloudy | |
# 4 Overcast => 2 Mostly Cloudy | |
# 5 Fog => 8 Fog | |
# 6 Freezing Fog => 8 Fog | |
# 7 Light Rain => 16 Light Rain | |
# 8 Rain => 3 Rain | |
# 9 Heavy Rain => 17 Heavy Rain | |
# 10 Freezing Rain => 3 Rain | |
# 11 Heavy Freezing Rain => 17 Heavy Rain | |
# 12 Sleet => 20 Light Rain Snow | |
# 13 Heavy Sleet => 21 Heavy Rain Snow | |
# 14 Light Snowfall => 18 Light Snow | |
# 15 Snowfall => 4 Snow | |
# 16 Heavy Snowfall => 19 Heavy Snow | |
# 17 Rain Shower => 13 Scattered Showers | |
# 18 Heavy Rain Shower => 13 Scattered Showers | |
# 19 Sleet Shower => 13 Scattered Showers | |
# 20 Heavy Sleet Shower => 21 Heavy Rain Snow | |
# 21 Snow Shower => 4 Snow | |
# 22 Heavy Snow Shower => 19 Heavy Snow | |
# 23 Lightning => 14 Scattered Thunderstorms | |
# 24 Hail => 12 Hail | |
# 25 Thunderstorm => 6 Thunderstorms | |
# 26 Heavy Thunderstorm => 6 Thunderstorms | |
# 27 Storm => 6 Thunderstorms | |
# All AL2 Weather Condition Codes, based on FIT weather records definitions: | |
# 0 Clear | |
# 1 Partly Cloudy | |
# 2 Mostly Cloudy | |
# 3 Rain | |
# 4 Snow | |
# 5 Windy | |
# 6 Thunderstorms | |
# 7 Wintry Mix | |
# 8 Fog | |
# 11 Hazy | |
# 12 Hail | |
# 13 Scattered Showers | |
# 14 Scattered Thunderstorms | |
# 15 Unknown Precipitation | |
# 16 Light Rain | |
# 17 Heavy Rain | |
# 18 Light Snow | |
# 19 Heavy Snow | |
# 20 Light Rain Snow | |
# 21 Heavy Rain Snow | |
# 22 Cloudy |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment