Last active
November 16, 2023 18:46
-
-
Save smothiki/e94b5b1f4873030aff2fbd40e2811f13 to your computer and use it in GitHub Desktop.
test example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality | |
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. | |
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009. | |
!pip3 install scikit-learn pandas numpy | |
import os | |
import warnings | |
import sys | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import ElasticNet | |
from urllib.parse import urlparse | |
import mlflow | |
import mlflow.sklearn | |
import logging | |
logging.basicConfig(level=logging.WARN) | |
logger = logging.getLogger(__name__) | |
def eval_metrics(actual, pred): | |
rmse = np.sqrt(mean_squared_error(actual, pred)) | |
mae = mean_absolute_error(actual, pred) | |
r2 = r2_score(actual, pred) | |
return rmse, mae, r2 | |
if __name__ == "__main__": | |
warnings.filterwarnings("ignore") | |
np.random.seed(40) | |
# Read the wine-quality csv file from the URL | |
csv_url = ( | |
"https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv" | |
) | |
try: | |
data = pd.read_csv(csv_url, sep=";") | |
except Exception as e: | |
logger.exception( | |
"Unable to download training & test CSV, check your internet connection. Error: %s", e | |
) | |
# Split the data into training and test sets. (0.75, 0.25) split. | |
train, test = train_test_split(data) | |
# The predicted column is "quality" which is a scalar from [3, 9] | |
train_x = train.drop(["quality"], axis=1) | |
test_x = test.drop(["quality"], axis=1) | |
train_y = train[["quality"]] | |
test_y = test[["quality"]] | |
alpha = 0.5 | |
l1_ratio = 0.5 | |
with mlflow.start_run(): | |
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) | |
lr.fit(train_x, train_y) | |
predicted_qualities = lr.predict(test_x) | |
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | |
print("Elasticnet model (alpha={:f}, l1_ratio={:f}):".format(alpha, l1_ratio)) | |
print(" RMSE: %s" % rmse) | |
print(" MAE: %s" % mae) | |
print(" R2: %s" % r2) | |
mlflow.log_param("alpha", alpha) | |
mlflow.log_param("l1_ratio", l1_ratio) | |
mlflow.log_metric("rmse", rmse) | |
mlflow.log_metric("r2", r2) | |
mlflow.log_metric("mae", mae) | |
predictions = lr.predict(train_x) | |
mlflow.sklearn.log_model(lr, "model", registered_model_name="testmodel") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"dataframe_split": { | |
"columns": ["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol"], | |
"data": [ | |
[6.2, 0.66, 0.48, 1.2, 0.029, 29.1, 75.2, 0.98, 3.33, 0.39, 12.8] | |
] | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
to run this pip3 install scikit-learn pandas numpy