Created
August 17, 2021 16:29
-
-
Save arnaldog12/a3f7801fe3910c02f4bcea8be61b910c to your computer and use it in GitHub Desktop.
Kedro question on stackoverflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_x: | |
type: pandas.CSVDataSet | |
filepath: data/01_raw/x_train.csv | |
test_x: | |
type: pandas.CSVDataSet | |
filepath: data/01_raw/x_test.csv | |
autoencoder_scaler: | |
type: pickle.PickleDataSet | |
filepath: data/06_models/autoencoder_scaler.pkl | |
pre_train_autoencoder: | |
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset | |
filepath: data/06_models/pre_train_autoencoder.h5 | |
pre_train_encoder: | |
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset | |
filepath: data/06_models/pre_train_encoder.h5 | |
pre_train_decoder: | |
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset | |
filepath: data/06_models/pre_train_decoder.h5 | |
autoencoder: | |
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset | |
filepath: data/06_models/autoencoder.h5 | |
encoder: | |
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset | |
filepath: data/06_models/encoder.h5 | |
decoder: | |
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset | |
filepath: data/06_models/decoder.h5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import MinMaxScaler | |
from tensorflow import keras | |
import tensorflow as tf | |
from typing import Dict, Any, Tuple | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import logging | |
def build_models(data: pd.DataFrame, n_hidden_layers: int, latent_space_size: int, retularization_stregth: float, seed: int) -> Tuple[keras.Model, keras.Model, keras.Model]: | |
assert n_hidden_layers >= 1, "There must be at least 1 hidden layer for the autoencoder" | |
n_features = data.shape[1] | |
tf.random.set_seed(seed) | |
input_layer = keras.Input(shape=(n_features,)) | |
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(input_layer) | |
hidden = keras.layers.LeakyReLU()(hidden) | |
for _ in range(n_hidden_layers - 1): | |
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(hidden) | |
hidden = keras.layers.LeakyReLU()(hidden) | |
encoded = keras.layers.Dense(latent_space_size, activation="sigmoid")(hidden) | |
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(encoded) | |
hidden = keras.layers.LeakyReLU()(hidden) | |
for _ in range(n_hidden_layers - 1): | |
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(hidden) | |
hidden = keras.layers.LeakyReLU()(hidden) | |
decoded = keras.layers.Dense(n_features, activation="sigmoid")(hidden) | |
# Defines the neural networks | |
autoencoder = keras.models.Model(inputs=input_layer, outputs=decoded) | |
encoder = keras.models.Model(inputs=input_layer, outputs=encoded) | |
decoder = keras.models.Model(inputs=input_layer, outputs=decoded) | |
autoencoder.compile(optimizer="adam", loss="mean_absolute_error") | |
return dict( | |
pre_train_autoencoder=autoencoder, | |
pre_train_encoder=encoder, | |
pre_train_decoder=decoder | |
) | |
def fit_scaler(data: pd.DataFrame) -> MinMaxScaler: | |
scaler = MinMaxScaler() | |
scaler.fit(data) | |
return scaler | |
def tranform_scaler(scaler: MinMaxScaler, data: pd.DataFrame) -> np.array: | |
return scaler.transform(data) | |
def train_autoencoder( | |
train_x: pd.DataFrame, test_x: pd.DataFrame, | |
autoencoder: keras.models.Model, # encoder: keras.Model, decoder: keras.Model, | |
epochs: int, batch_size: int, seed: int) -> Dict[str, Any]: | |
tf.random.set_seed(seed) | |
callbacks = [ | |
keras.callbacks.History(), | |
keras.callbacks.EarlyStopping(patience=3) | |
] | |
logging.info(train_x.shape) | |
logging.info(test_x.shape) | |
history = autoencoder.fit( | |
train_x, train_x, | |
validation_data=(test_x, test_x), | |
callbacks=callbacks, | |
epochs=epochs, | |
batch_size=batch_size | |
) | |
return dict( | |
autoencoder=autoencoder, | |
autoencoder_history=history.history, | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
seed: 200 | |
# Autoencoder | |
autoencoder_n_hidden_layers: 3 | |
autoencoder_latent_space_size: 15 | |
autoencoder_epochs: 100 | |
autoencoder_batch_size: 32 | |
autoencoder_regularization_strength: 0.001 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from kedro.pipeline import Pipeline, node | |
from .nodes import * | |
def train_autoencoder_pipeline(): | |
return Pipeline([ | |
# Build neural network | |
node( | |
build_models, | |
inputs=[ | |
"train_x", | |
"params:autoencoder_n_hidden_layers", | |
"params:autoencoder_latent_space_size", | |
"params:autoencoder_regularization_strength", | |
"params:seed" | |
], | |
outputs=dict( | |
pre_train_autoencoder="pre_train_autoencoder", | |
pre_train_encoder="pre_train_encoder", | |
pre_train_decoder="pre_train_decoder" | |
), name="autoencoder-create-models" | |
), | |
# Scale features | |
node(fit_scaler, inputs="train_x", outputs="autoencoder_scaler", name="autoencoder-fit-scaler"), | |
node(tranform_scaler, inputs=["autoencoder_scaler", "train_x"], outputs="autoencoder_scaled_train_x", name="autoencoder-scale-train"), | |
node(tranform_scaler, inputs=["autoencoder_scaler", "test_x"], outputs="autoencoder_scaled_test_x", name="autoencoder-scale-test"), | |
# Train autoencoder | |
node( | |
train_autoencoder, | |
inputs=[ | |
"autoencoder_scaled_train_x", | |
"autoencoder_scaled_test_x", | |
"pre_train_autoencoder", | |
# "pre_train_encoder", # removed since it's unused | |
# "pre_train_decoder", # removed since it's unused | |
"params:autoencoder_epochs", | |
"params:autoencoder_batch_size", | |
"params:seed" | |
], | |
outputs= dict( | |
autoencoder="autoencoder", | |
# encoder="encoder", # removed since it's unused | |
# decoder="decoder", # removed since it's unused | |
autoencoder_history="autoencoder_history", | |
), | |
name="autoencoder-train-model" | |
)]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment