Last active
January 23, 2021 16:53
-
-
Save ianychoi/3850d7c34c76aa2e6219db698ed57241 to your computer and use it in GitHub Desktop.
202101-blog-article-fairlearn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| pip install fairlearn |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| from sklearn.datasets import fetch_openml | |
| # 인구조사 데이터셋 불러오기 | |
| data = fetch_openml(data_id=1590, as_frame=True) | |
| # 성별 및 인종과 같이 민감한 feature를 모델 트레이닝에서 제외 | |
| X_raw = data.data | |
| y_true = (data.target == ">50K") * 1 | |
| A = X_raw[["race", "sex"]] | |
| X_raw = pd.get_dummies(X_raw.drop(labels=['sex', 'race'],axis = 1)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.model_selection import train_test_split | |
| # 데이터를 "train" (트레이닝) 및 "test" (테스트) 셋으로 분리 | |
| (X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split( | |
| X_raw, y_true, A, test_size=0.3, random_state=12345, stratify=y_true | |
| ) | |
| # Ensure indices are aligned between X, y and A, | |
| # after all the slicing and splitting of DataFrames | |
| # and Series | |
| X_train = X_train.reset_index(drop=True) | |
| X_test = X_test.reset_index(drop=True) | |
| y_train = y_train.reset_index(drop=True) | |
| y_test = y_test.reset_index(drop=True) | |
| A_train = A_train.reset_index(drop=True) | |
| A_test = A_test.reset_index(drop=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score | |
| classifier = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4) | |
| classifier.fit(X_train, y_train) | |
| y_pred_tr=classifier.predict(X_test) | |
| print('Accuracy: %.3f' % accuracy_score(y_test, y_pred_tr)) | |
| print('Precision: %.3f' % precision_score(y_test, y_pred_tr)) | |
| print('Recall: %.3f' % recall_score(y_test, y_pred_tr)) | |
| print('F1 score: %.3f' % f1_score(y_test, y_pred_tr)) | |
| print('AUC: %.3f' % roc_auc_score(y_test, y_pred_tr)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # https://github.com/Azure/MachineLearningNotebooks/blob/master/contrib/fairness/upload-fairness-dashboard.yml | |
| name: upload-fairness-dashboard | |
| dependencies: | |
| - pip: | |
| - azureml-sdk | |
| - azureml-contrib-fairness | |
| - fairlearn==0.4.6 | |
| - joblib |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # View this model in Fairlearn's fairness dashboard, and see the disparities which appear: | |
| from fairlearn.widget import FairlearnDashboard | |
| FairlearnDashboard(sensitive_features=A_test, | |
| sensitive_feature_names=['Race', 'Sex'], | |
| y_true=y_test, | |
| y_pred={"model": y_pred_tr}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 애저 머신 러닝 서비스 연결에 필요한 정보를 가져옴 | |
| from azureml.core import Workspace, Experiment, Model | |
| import joblib | |
| import os | |
| # config.json 파일에서 설정을 가져옴 | |
| # 참고: https://docs.microsoft.com/ko-kr/azure/machine-learning/how-to-configure-environment#workspace | |
| ws = Workspace.from_config() | |
| ws.get_details() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| os.makedirs('models', exist_ok=True) | |
| # 사용한 모델 등록이 필요함 (여러 번 할 필요가 없으며, 이미 모델을 등록하였다면 기존 모델을 가져오도록 변경해야 함) | |
| # Function to register models into Azure Machine Learning | |
| def register_model(name, model): | |
| print("Registering ", name) | |
| model_path = "models/{0}.pkl".format(name) | |
| joblib.dump(value=model, filename=model_path) | |
| registered_model = Model.register(model_path=model_path, | |
| model_name=name, | |
| workspace=ws) | |
| print("Registered ", registered_model.id) | |
| return registered_model.id | |
| # Call the register_model function | |
| dt_classifier_id = register_model("fairness_DecisionTreeClassifier", classifier) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 공정성에 대한 메트릭을 미리 계산 | |
| # Create a dictionary of model(s) you want to assess for fairness | |
| sf = { 'Race': A_test.race, 'Sex': A_test.sex} | |
| ys_pred = { dt_classifier_id:y_pred_tr } | |
| from fairlearn.metrics._group_metric_set import _create_group_metric_set | |
| dash_dict = _create_group_metric_set(y_true=y_test, | |
| predictions=ys_pred, | |
| sensitive_features=sf, | |
| prediction_type='binary_classification') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 미리 계산된 공정성 메트릭을 애저 머신 러닝 서비스에 업로드 | |
| from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id | |
| exp = Experiment(ws, "Test_Fairness_Census_Demo-testset") | |
| print(exp) | |
| run = exp.start_logging() | |
| # Upload the dashboard to Azure Machine Learning | |
| try: | |
| dashboard_title = "Fairness insights of Decision Tree Classifier" | |
| # Set validate_model_ids parameter of upload_dashboard_dictionary to False if you have not registered your model(s) | |
| upload_id = upload_dashboard_dictionary(run, | |
| dash_dict, | |
| dashboard_name=dashboard_title) | |
| print("\nUploaded to id: {0}\n".format(upload_id)) | |
| # To test the dashboard, you can download it back and ensure it contains the right information | |
| downloaded_dict = download_dashboard_by_upload_id(run, upload_id) | |
| finally: | |
| run.complete() | |
| # 애저 머신 러닝 서비스 내에서 확인 가능 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment