Skip to content

Instantly share code, notes, and snippets.

@AyrtonB
Created May 31, 2022 11:09
Show Gist options
  • Save AyrtonB/e863eb347577045d3716e3483b39adf7 to your computer and use it in GitHub Desktop.
Save AyrtonB/e863eb347577045d3716e3483b39adf7 to your computer and use it in GitHub Desktop.
This gist loads the electric insights files found in https://github.com/AyrtonB/Electric-Insights and cleans the onshore wind data which often has missing data
import pandas as pd
from typing import Optional
def clean_onshore_s(
s_onshore: pd.Series,
delta_theshold_quantile: float = 0.96
) -> pd.Series:
s_onshore_diff = s_onshore.diff()
delta_theshold_abs_value = s_onshore_diff.abs().quantile(delta_theshold_quantile)
s_drop_remove = s_onshore_diff < -delta_theshold_abs_value
s_increase_remove = (s_onshore_diff > delta_theshold_abs_value).shift(-1)
s_keep_bool = ~(s_drop_remove | s_increase_remove)
s_onshore_keep = s_onshore[s_keep_bool]
s_onshore_cleaned = s_onshore_keep.reindex(s_onshore.index).interpolate()
return s_onshore_cleaned
def load_ei_df(
ei_fp: str,
delta_theshold_quantile: Optional[float] = 0.96
) -> pd.DataFrame:
df_ei = pd.read_csv(ei_fp)
df_ei['local_datetime'] = pd.to_datetime(df_ei['local_datetime'], utc=True)
df_ei = df_ei.set_index('local_datetime')
df_ei = df_ei[~df_ei.index.duplicated()]
if delta_theshold_quantile is not None:
df_ei = df_ei.assign(wind_onshore=clean_onshore_s(df_ei['wind_onshore'], delta_theshold_quantile))
return df_ei
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment