Skip to content

Instantly share code, notes, and snippets.

@reefwing
Created July 2, 2024 07:44
Show Gist options
  • Save reefwing/2fcfbaa6f83786f50cb02a369e0f10e4 to your computer and use it in GitHub Desktop.
Save reefwing/2fcfbaa6f83786f50cb02a369e0f10e4 to your computer and use it in GitHub Desktop.
Initial hyperparameter settings for our battery SOC ML model
# Copyright (c) 2024 David Such
#
# This software is released under the MIT License.
# https://opensource.org/licenses/MIT
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import joblib, os, time
# Start the timer for the complete script
start_time = time.time()
# Define the file paths
preprocessed_folder = os.path.expanduser("~/Documents/GitHub/Embedded-AI/data/LGHG2@n10C_to_25degC/Preprocessed")
model_folder = os.path.expanduser("~/Documents/GitHub/Embedded-AI/data/LGHG2@n10C_to_25degC/Model")
os.makedirs(model_folder, exist_ok=True)
# Define the training data file path (preprocessed)
train_file = os.path.join(preprocessed_folder, 'resampled_training_data.csv')
# Load the training data
train_df = pd.read_csv(train_file)
# Extract features and target variable
X_train = train_df[['Voltage', 'Current', 'Temperature', 'Average Voltage', 'Average Current']]
y_train = train_df['SOC']
# Define the GPR model with initial kernel
kernel = C(0.01, (0.001, 0.1)) * RBF(0.5, (0.01, 1.0))
gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, random_state=42)
# Create a pipeline with standardization and GPR
pipeline = Pipeline([
('scaler', StandardScaler()),
('gpr', gpr)
])
# Define the hyperparameter grid to optimize
param_grid = {
'gpr__kernel': [
C(0.01, (0.001, 0.1)) * RBF(0.5, (0.01, 1.0)),
C(0.01, (0.001, 0.1)) * RBF(0.25, (0.01, 1.0)),
C(0.01, (0.001, 0.1)) * RBF(1.0, (0.01, 1.0))
]
}
# Set up the grid search with cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, verbose=2)
# Start the timer for the grid search
grid_search_start_time = time.time()
# Fit the model
grid_search.fit(X_train, y_train)
# Print the elapsed time for grid search
grid_search_elapsed_time = time.time() - grid_search_start_time
grid_search_minutes, grid_search_seconds = divmod(grid_search_elapsed_time, 60)
print(f"Grid search completed in {int(grid_search_minutes)} minutes and {grid_search_seconds:.2f} seconds")
# Output the best parameters and the corresponding score
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_}")
# Save the best model
model_file = os.path.join(model_folder, 'best_gpr_model.pkl')
joblib.dump(grid_search.best_estimator_, model_file)
# Print the total elapsed time for the script
total_elapsed_time = time.time() - start_time
total_minutes, total_seconds = divmod(total_elapsed_time, 60)
print(f"Total script execution time: {int(total_minutes)} minutes and {total_seconds:.2f} seconds")
# Load the best model (example of how to load it later)
# best_gpr_model = joblib.load(model_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment