Last active
September 21, 2023 23:52
-
-
Save smothiki/263a1264a68a4534c5bb0ade89cd828c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import sys | |
import warnings | |
from urllib.parse import urlparse | |
import numpy as np | |
import pandas as pd | |
from sklearn.linear_model import ElasticNet | |
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score | |
from sklearn.model_selection import train_test_split | |
import mlflow | |
import mlflow.sklearn | |
from mlflow.models import infer_signature | |
import onnx | |
from skl2onnx import convert_sklearn | |
from skl2onnx.common.data_types import FloatTensorType | |
logging.basicConfig(level=logging.WARN) | |
logger = logging.getLogger(__name__) | |
def convert_to_onnx(model, data): | |
# print(data.shape[0]]) | |
initial_type = [('float_input', FloatTensorType([None, 11]))] | |
onnx_model = convert_sklearn(model, initial_types=initial_type) | |
print("onnx_model.type:",type(onnx_model)) | |
mlflow.set_tag("onnx_version",onnx.__version__) | |
return onnx_model | |
def eval_metrics(actual, pred): | |
rmse = np.sqrt(mean_squared_error(actual, pred)) | |
mae = mean_absolute_error(actual, pred) | |
r2 = r2_score(actual, pred) | |
return rmse, mae, r2 | |
if __name__ == "__main__": | |
warnings.filterwarnings("ignore") | |
np.random.seed(40) | |
# Read the wine-quality csv file from the URL | |
csv_url = ( | |
"https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv" | |
) | |
try: | |
data = pd.read_csv(csv_url, sep=";") | |
except Exception as e: | |
logger.exception( | |
"Unable to download training & test CSV, check your internet connection. Error: %s", e | |
) | |
# Split the data into training and test sets. (0.75, 0.25) split. | |
train, test = train_test_split(data) | |
# The predicted column is "quality" which is a scalar from [3, 9] | |
train_x = train.drop(["quality"], axis=1) | |
test_x = test.drop(["quality"], axis=1) | |
train_y = train[["quality"]] | |
test_y = test[["quality"]] | |
alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 | |
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 | |
with mlflow.start_run(): | |
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) | |
lr.fit(train_x, train_y) | |
predicted_qualities = lr.predict(test_x) | |
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | |
print(f"Elasticnet model (alpha={alpha:f}, l1_ratio={l1_ratio:f}):") | |
print(f" RMSE: {rmse}") | |
print(f" MAE: {mae}") | |
print(f" R2: {r2}") | |
mlflow.log_param("alpha", alpha) | |
mlflow.log_param("l1_ratio", l1_ratio) | |
mlflow.log_metric("rmse", rmse) | |
mlflow.log_metric("r2", r2) | |
mlflow.log_metric("mae", mae) | |
predictions = lr.predict(train_x) | |
signature = infer_signature(train_x, predictions) | |
mlflow.sklearn.log_model(lr, "model", signature=signature) | |
onnx_model = convert_to_onnx(lr, test_x) | |
mlflow.onnx.log_model(onnx_model, "onnx-model") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip3 install scikit-learn pandas numpy onnx==1.13.1 onnxruntime skl2onnx | |
import mlflow | |
from sklearn.model_selection import train_test_split | |
import numpy as np | |
import mlflow.onnx | |
import onnx | |
import onnxruntime as rt | |
from skl2onnx import convert_sklearn | |
from skl2onnx.common.data_types import FloatTensorType | |
def score(model, data_ndarray): | |
sess = rt.InferenceSession(model.SerializeToString(), providers=['CPUExecutionProvider']) | |
input_name = sess.get_inputs()[0].name | |
print(sess.get_inputs()[0]) | |
return sess.run(None,{input_name: data_ndarray}) | |
# return sess.run(None, {input_name: data_ndarray.astype(np.float32)})[0] | |
logged_model = '/home/cdsw/.experiments/2hrm-o9a9-1qwd-qb17/9l7r-lz3a-tast-9pfy/artifacts/onnx-model' | |
model=mlflow.onnx.load_model("runs:/11yt-wh5d-d36y-13hy/onnx-model") | |
# Predict on a Pandas DataFrame. | |
csv_url = ("https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv") | |
try: | |
data = pd.read_csv(csv_url, sep=";") | |
except Exception as e: | |
logger.exception("Unable to download training & test CSV, check your internet connection. Error: %s", e) | |
train, test = train_test_split(data,test_size=1) | |
print(test.shape[0]) | |
print(test[:1]) | |
data=np.array([6.2, 0.66, 0.48, 1.2, 0.029, 29, 75, 0.98, 3.33, 0.39, 12.8], dtype='float') | |
train_x = train.drop(["quality"], axis=1) | |
test_x = test.drop(["quality"], axis=1) | |
print(test_x) | |
score(model, data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment