Created
May 31, 2022 11:09
-
-
Save AyrtonB/e863eb347577045d3716e3483b39adf7 to your computer and use it in GitHub Desktop.
This gist loads the electric insights files found in https://github.com/AyrtonB/Electric-Insights and cleans the onshore wind data which often has missing data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from typing import Optional | |
def clean_onshore_s( | |
s_onshore: pd.Series, | |
delta_theshold_quantile: float = 0.96 | |
) -> pd.Series: | |
s_onshore_diff = s_onshore.diff() | |
delta_theshold_abs_value = s_onshore_diff.abs().quantile(delta_theshold_quantile) | |
s_drop_remove = s_onshore_diff < -delta_theshold_abs_value | |
s_increase_remove = (s_onshore_diff > delta_theshold_abs_value).shift(-1) | |
s_keep_bool = ~(s_drop_remove | s_increase_remove) | |
s_onshore_keep = s_onshore[s_keep_bool] | |
s_onshore_cleaned = s_onshore_keep.reindex(s_onshore.index).interpolate() | |
return s_onshore_cleaned | |
def load_ei_df( | |
ei_fp: str, | |
delta_theshold_quantile: Optional[float] = 0.96 | |
) -> pd.DataFrame: | |
df_ei = pd.read_csv(ei_fp) | |
df_ei['local_datetime'] = pd.to_datetime(df_ei['local_datetime'], utc=True) | |
df_ei = df_ei.set_index('local_datetime') | |
df_ei = df_ei[~df_ei.index.duplicated()] | |
if delta_theshold_quantile is not None: | |
df_ei = df_ei.assign(wind_onshore=clean_onshore_s(df_ei['wind_onshore'], delta_theshold_quantile)) | |
return df_ei |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment