Reference: https://hydra.cc/docs/patterns/configuring_experiments/
defaults:
- db: mysql
- server: apache
## custom function | |
def custom_function_example(x,y): | |
return x + y | |
## apply rolling on a custom function | |
def apply_rolling_function(df, window_size, func, column1, column2, verbose = False): | |
# validate arguments | |
assert column1 in df.columns.tolist() | |
assert column2 in df.columns.tolist() | |
# initialize | |
result = [] |
## mapplot creation of a z variable according to x/y variables, all of them, in a df | |
def mapplot(df:pd.DataFrame, c_x:str, c_y:str, c_z:str, title:str = '', c_map:str = "rainbow"): | |
# validate arguments | |
assert c_x in df.columns.tolist() | |
assert c_y in df.columns.tolist() | |
assert c_z in df.columns.tolist() | |
# initialize | |
import matplotlib.pyplot as plt | |
# collect data | |
x = df[c_x].values |
# multiple pandas df tables to one excel file on multiple sheets | |
with pd.ExcelWriter(path_output, engine='xlsxwriter') as writer: | |
df1.to_excel(writer, sheet_name='sheet1') | |
df2.to_excel(writer, sheet_name='sheet2') | |
df3.to_excel(writer, sheet_name='sheet3') |
import pandas as pd | |
from sklearn.preprocessing import FunctionTransformer | |
from sklearn.pipeline import Pipeline | |
# example | |
from sklearn.linear_model import LogisticRegression | |
# X, y | |
def get_dummies_size(df): | |
return pd.get_dummies(df, columns=['size']) |
import pandas as pd | |
import numpy as np | |
from sklearn.pipeline import Pipeline | |
from sklearn.compose import ColumnTransformer | |
# example models and preprocessors | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.linear_model import LogisticRegression | |
# X, y |
Reference: https://hydra.cc/docs/patterns/configuring_experiments/
defaults:
- db: mysql
- server: apache
class Credentials(): | |
def __init__(self): | |
self.user = "user" | |
self.password = "password" | |
class Service(Credentials): | |
def __init__(self): | |
super().__init__() |
import pandas as pd | |
## unstack a timeseries target variable according to a categorical reference column | |
def unstack_ts_according_to_reference(df:pd.DataFrame, c_dt:str, c_cat_reference:str, c_target_variable:str)->pd.DataFrame: | |
""" | |
Unstack a timeseries target variable according to a categorical reference column. | |
df -- Dataframe to be processed. | |
c_dt -- Temporal column. | |
c_cat_reference -- Categorical column to be used as reference to stack the target variable. | |
c_target_variable -- Num / Cat column to be stacked. |
from scipy.stats import linregress | |
# estimate linear regression y = Ax + B | |
A, B, r_value, p_value, std_err = linregress(x, y) |
# original column | |
In [15]: df["timedelta_column"] | |
Out[15]: | |
0 1 days 00:00:00 | |
1 3 days 02:00:00 | |
2 5 days 04:00:00 | |
3 7 days 06:00:00 | |
4 9 days 08:00:00 | |
5 11 days 10:00:00 | |
dtype: timedelta64[ns] |