Skip to content

Instantly share code, notes, and snippets.

@ianychoi
Last active January 23, 2021 16:53
Show Gist options
  • Select an option

  • Save ianychoi/3850d7c34c76aa2e6219db698ed57241 to your computer and use it in GitHub Desktop.

Select an option

Save ianychoi/3850d7c34c76aa2e6219db698ed57241 to your computer and use it in GitHub Desktop.
202101-blog-article-fairlearn
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
# 인구조사 데이터셋 불러오기
data = fetch_openml(data_id=1590, as_frame=True)
# 성별 및 인종과 같이 민감한 feature를 모델 트레이닝에서 제외
X_raw = data.data
y_true = (data.target == ">50K") * 1
A = X_raw[["race", "sex"]]
X_raw = pd.get_dummies(X_raw.drop(labels=['sex', 'race'],axis = 1))
from sklearn.model_selection import train_test_split
# 데이터를 "train" (트레이닝) 및 "test" (테스트) 셋으로 분리
(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(
X_raw, y_true, A, test_size=0.3, random_state=12345, stratify=y_true
)
# Ensure indices are aligned between X, y and A,
# after all the slicing and splitting of DataFrames
# and Series
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)
A_train = A_train.reset_index(drop=True)
A_test = A_test.reset_index(drop=True)
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
classifier = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4)
classifier.fit(X_train, y_train)
y_pred_tr=classifier.predict(X_test)
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred_tr))
print('Precision: %.3f' % precision_score(y_test, y_pred_tr))
print('Recall: %.3f' % recall_score(y_test, y_pred_tr))
print('F1 score: %.3f' % f1_score(y_test, y_pred_tr))
print('AUC: %.3f' % roc_auc_score(y_test, y_pred_tr))
# https://github.com/Azure/MachineLearningNotebooks/blob/master/contrib/fairness/upload-fairness-dashboard.yml
name: upload-fairness-dashboard
dependencies:
- pip:
- azureml-sdk
- azureml-contrib-fairness
- fairlearn==0.4.6
- joblib
# View this model in Fairlearn's fairness dashboard, and see the disparities which appear:
from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=A_test,
sensitive_feature_names=['Race', 'Sex'],
y_true=y_test,
y_pred={"model": y_pred_tr})
# 애저 머신 러닝 서비스 연결에 필요한 정보를 가져옴
from azureml.core import Workspace, Experiment, Model
import joblib
import os
# config.json 파일에서 설정을 가져옴
# 참고: https://docs.microsoft.com/ko-kr/azure/machine-learning/how-to-configure-environment#workspace
ws = Workspace.from_config()
ws.get_details()
os.makedirs('models', exist_ok=True)
# 사용한 모델 등록이 필요함 (여러 번 할 필요가 없으며, 이미 모델을 등록하였다면 기존 모델을 가져오도록 변경해야 함)
# Function to register models into Azure Machine Learning
def register_model(name, model):
print("Registering ", name)
model_path = "models/{0}.pkl".format(name)
joblib.dump(value=model, filename=model_path)
registered_model = Model.register(model_path=model_path,
model_name=name,
workspace=ws)
print("Registered ", registered_model.id)
return registered_model.id
# Call the register_model function
dt_classifier_id = register_model("fairness_DecisionTreeClassifier", classifier)
# 공정성에 대한 메트릭을 미리 계산
# Create a dictionary of model(s) you want to assess for fairness
sf = { 'Race': A_test.race, 'Sex': A_test.sex}
ys_pred = { dt_classifier_id:y_pred_tr }
from fairlearn.metrics._group_metric_set import _create_group_metric_set
dash_dict = _create_group_metric_set(y_true=y_test,
predictions=ys_pred,
sensitive_features=sf,
prediction_type='binary_classification')
# 미리 계산된 공정성 메트릭을 애저 머신 러닝 서비스에 업로드
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id
exp = Experiment(ws, "Test_Fairness_Census_Demo-testset")
print(exp)
run = exp.start_logging()
# Upload the dashboard to Azure Machine Learning
try:
dashboard_title = "Fairness insights of Decision Tree Classifier"
# Set validate_model_ids parameter of upload_dashboard_dictionary to False if you have not registered your model(s)
upload_id = upload_dashboard_dictionary(run,
dash_dict,
dashboard_name=dashboard_title)
print("\nUploaded to id: {0}\n".format(upload_id))
# To test the dashboard, you can download it back and ensure it contains the right information
downloaded_dict = download_dashboard_by_upload_id(run, upload_id)
finally:
run.complete()
# 애저 머신 러닝 서비스 내에서 확인 가능
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment