Juan Quintana jmquintana79

Reference: https://hydra.cc/docs/patterns/configuring_experiments/

config.ymal

defaults:
  - db: mysql
  - server: apache

	## custom function
	def custom_function_example(x,y):
	return x + y
	## apply rolling on a custom function
	def apply_rolling_function(df, window_size, func, column1, column2, verbose = False):
	# validate arguments
	assert column1 in df.columns.tolist()
	assert column2 in df.columns.tolist()
	# initialize
	result = []

	## mapplot creation of a z variable according to x/y variables, all of them, in a df
	def mapplot(df:pd.DataFrame, c_x:str, c_y:str, c_z:str, title:str = '', c_map:str = "rainbow"):
	# validate arguments
	assert c_x in df.columns.tolist()
	assert c_y in df.columns.tolist()
	assert c_z in df.columns.tolist()
	# initialize
	import matplotlib.pyplot as plt
	# collect data
	x = df[c_x].values


	# multiple pandas df tables to one excel file on multiple sheets
	with pd.ExcelWriter(path_output, engine='xlsxwriter') as writer:
	df1.to_excel(writer, sheet_name='sheet1')
	df2.to_excel(writer, sheet_name='sheet2')
	df3.to_excel(writer, sheet_name='sheet3')

	import pandas as pd
	from sklearn.preprocessing import FunctionTransformer
	from sklearn.pipeline import Pipeline
	# example
	from sklearn.linear_model import LogisticRegression

	# X, y

	def get_dummies_size(df):
	return pd.get_dummies(df, columns=['size'])

	import pandas as pd
	import numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	# example models and preprocessors
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.linear_model import LogisticRegression

	# X, y

	import pandas as pd

	## unstack a timeseries target variable according to a categorical reference column
	def unstack_ts_according_to_reference(df:pd.DataFrame, c_dt:str, c_cat_reference:str, c_target_variable:str)->pd.DataFrame:
	"""
	Unstack a timeseries target variable according to a categorical reference column.
	df -- Dataframe to be processed.
	c_dt -- Temporal column.
	c_cat_reference -- Categorical column to be used as reference to stack the target variable.
	c_target_variable -- Num / Cat column to be stacked.

	from scipy.stats import linregress
	# estimate linear regression y = Ax + B
	A, B, r_value, p_value, std_err = linregress(x, y)