ianychoi · January 23, 2021 16:53
diff --git a/01-pip-install-fairlearn.sh b/01-pip-install-fairlearn.sh
 pip install fairlearn
diff --git a/02-fetch_openml.py b/02-fetch_openml.py
 import numpy as np
 import pandas as pd

 from sklearn.datasets import fetch_openml

 # 인구조사 데이터셋 불러오기
 data = fetch_openml(data_id=1590, as_frame=True)

 # 성별 및 인종과 같이 민감한 feature를 모델 트레이닝에서 제외
 X_raw = data.data
 y_true = (data.target == ">50K") * 1
 A = X_raw[["race", "sex"]]
 X_raw = pd.get_dummies(X_raw.drop(labels=['sex', 'race'],axis = 1))
diff --git a/03-dataset-split.py b/03-dataset-split.py
 from sklearn.model_selection import train_test_split

 # 데이터를 "train" (트레이닝) 및 "test" (테스트) 셋으로 분리
 (X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(
    X_raw, y_true, A, test_size=0.3, random_state=12345, stratify=y_true
 )

 # Ensure indices are aligned between X, y and A,
 # after all the slicing and splitting of DataFrames
 # and Series
 X_train = X_train.reset_index(drop=True)
 X_test = X_test.reset_index(drop=True)
 y_train = y_train.reset_index(drop=True)
 y_test = y_test.reset_index(drop=True)
 A_train = A_train.reset_index(drop=True)
 A_test = A_test.reset_index(drop=True)
diff --git a/04-training.py b/04-training.py
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

 classifier = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4)
 classifier.fit(X_train, y_train)

 y_pred_tr=classifier.predict(X_test)
 print('Accuracy: %.3f' % accuracy_score(y_test, y_pred_tr))
 print('Precision: %.3f' % precision_score(y_test, y_pred_tr))
 print('Recall: %.3f' % recall_score(y_test, y_pred_tr))
 print('F1 score: %.3f' % f1_score(y_test, y_pred_tr))
 print('AUC: %.3f' % roc_auc_score(y_test, y_pred_tr))
diff --git a/05-2-upload-fairness-dashboard.yml b/05-2-upload-fairness-dashboard.yml
 # https://github.com/Azure/MachineLearningNotebooks/blob/master/contrib/fairness/upload-fairness-dashboard.yml
 name: upload-fairness-dashboard
 dependencies:
 - pip:
  - azureml-sdk
  - azureml-contrib-fairness
  - fairlearn==0.4.6
  - joblib
diff --git a/05-fairlearn-dashboard.py b/05-fairlearn-dashboard.py
 # View this model in Fairlearn's fairness dashboard, and see the disparities which appear:
 from fairlearn.widget import FairlearnDashboard
 FairlearnDashboard(sensitive_features=A_test, 
                   sensitive_feature_names=['Race', 'Sex'],
                   y_true=y_test,
                   y_pred={"model": y_pred_tr})
diff --git a/06-azureml-config.py b/06-azureml-config.py
 # 애저 머신 러닝 서비스 연결에 필요한 정보를 가져옴
 from azureml.core import Workspace, Experiment, Model
 import joblib
 import os

 # config.json 파일에서 설정을 가져옴
 # 참고: https://docs.microsoft.com/ko-kr/azure/machine-learning/how-to-configure-environment#workspace
 ws = Workspace.from_config()
 ws.get_details()
diff --git a/07-azure-model-reg.py b/07-azure-model-reg.py
 os.makedirs('models', exist_ok=True)

 # 사용한 모델 등록이 필요함 (여러 번 할 필요가 없으며, 이미 모델을 등록하였다면 기존 모델을 가져오도록 변경해야 함)

 # Function to register models into Azure Machine Learning
 def register_model(name, model):
    print("Registering ", name)
    model_path = "models/{0}.pkl".format(name)
    joblib.dump(value=model, filename=model_path)
    registered_model = Model.register(model_path=model_path,
                                    model_name=name,
                                    workspace=ws)
    print("Registered ", registered_model.id)
    return registered_model.id

 # Call the register_model function 
 dt_classifier_id = register_model("fairness_DecisionTreeClassifier", classifier)
diff --git a/08-fairlearn-metric-calc.py b/08-fairlearn-metric-calc.py
 # 공정성에 대한 메트릭을 미리 계산

 #  Create a dictionary of model(s) you want to assess for fairness 
 sf = { 'Race': A_test.race, 'Sex': A_test.sex}
 ys_pred = { dt_classifier_id:y_pred_tr }

 from fairlearn.metrics._group_metric_set import _create_group_metric_set

 dash_dict = _create_group_metric_set(y_true=y_test,
                                    predictions=ys_pred,
                                    sensitive_features=sf,
                                    prediction_type='binary_classification')
diff --git a/09-azureml-upload.py b/09-azureml-upload.py
 # 미리 계산된 공정성 메트릭을 애저 머신 러닝 서비스에 업로드

 from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id

 exp = Experiment(ws, "Test_Fairness_Census_Demo-testset")
 print(exp)

 run = exp.start_logging()

 # Upload the dashboard to Azure Machine Learning
 try:
    dashboard_title = "Fairness insights of Decision Tree Classifier"
    # Set validate_model_ids parameter of upload_dashboard_dictionary to False if you have not registered your model(s)
    upload_id = upload_dashboard_dictionary(run,
                                            dash_dict,
                                            dashboard_name=dashboard_title)
    print("\nUploaded to id: {0}\n".format(upload_id))

    # To test the dashboard, you can download it back and ensure it contains the right information
    downloaded_dict = download_dashboard_by_upload_id(run, upload_id)
 finally:
    run.complete()
    
 # 애저 머신 러닝 서비스 내에서 확인 가능
	import numpy as np
	import pandas as pd

	from sklearn.datasets import fetch_openml

	# 인구조사 데이터셋 불러오기
	data = fetch_openml(data_id=1590, as_frame=True)

	# 성별 및 인종과 같이 민감한 feature를 모델 트레이닝에서 제외
	X_raw = data.data
	y_true = (data.target == ">50K") * 1
	A = X_raw[["race", "sex"]]
	X_raw = pd.get_dummies(X_raw.drop(labels=['sex', 'race'],axis = 1))
	from sklearn.model_selection import train_test_split

	# 데이터를 "train" (트레이닝) 및 "test" (테스트) 셋으로 분리
	(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(
	X_raw, y_true, A, test_size=0.3, random_state=12345, stratify=y_true
	)

	# Ensure indices are aligned between X, y and A,
	# after all the slicing and splitting of DataFrames
	# and Series
	X_train = X_train.reset_index(drop=True)
	X_test = X_test.reset_index(drop=True)
	y_train = y_train.reset_index(drop=True)
	y_test = y_test.reset_index(drop=True)
	A_train = A_train.reset_index(drop=True)
	A_test = A_test.reset_index(drop=True)
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

	classifier = DecisionTreeClassifier(min_samples_leaf=10, max_depth=4)
	classifier.fit(X_train, y_train)

	y_pred_tr=classifier.predict(X_test)
	print('Accuracy: %.3f' % accuracy_score(y_test, y_pred_tr))
	print('Precision: %.3f' % precision_score(y_test, y_pred_tr))
	print('Recall: %.3f' % recall_score(y_test, y_pred_tr))
	print('F1 score: %.3f' % f1_score(y_test, y_pred_tr))
	print('AUC: %.3f' % roc_auc_score(y_test, y_pred_tr))
	# https://github.com/Azure/MachineLearningNotebooks/blob/master/contrib/fairness/upload-fairness-dashboard.yml
	name: upload-fairness-dashboard
	dependencies:
	- pip:
	- azureml-sdk
	- azureml-contrib-fairness
	- fairlearn==0.4.6
	- joblib
	# View this model in Fairlearn's fairness dashboard, and see the disparities which appear:
	from fairlearn.widget import FairlearnDashboard
	FairlearnDashboard(sensitive_features=A_test,
	sensitive_feature_names=['Race', 'Sex'],
	y_true=y_test,
	y_pred={"model": y_pred_tr})
	# 애저 머신 러닝 서비스 연결에 필요한 정보를 가져옴
	from azureml.core import Workspace, Experiment, Model
	import joblib
	import os

	# config.json 파일에서 설정을 가져옴
	# 참고: https://docs.microsoft.com/ko-kr/azure/machine-learning/how-to-configure-environment#workspace
	ws = Workspace.from_config()
	ws.get_details()
	os.makedirs('models', exist_ok=True)

	# 사용한 모델 등록이 필요함 (여러 번 할 필요가 없으며, 이미 모델을 등록하였다면 기존 모델을 가져오도록 변경해야 함)

	# Function to register models into Azure Machine Learning
	def register_model(name, model):
	print("Registering ", name)
	model_path = "models/{0}.pkl".format(name)
	joblib.dump(value=model, filename=model_path)
	registered_model = Model.register(model_path=model_path,
	model_name=name,
	workspace=ws)
	print("Registered ", registered_model.id)
	return registered_model.id

	# Call the register_model function
	dt_classifier_id = register_model("fairness_DecisionTreeClassifier", classifier)
	# 공정성에 대한 메트릭을 미리 계산

	# Create a dictionary of model(s) you want to assess for fairness
	sf = { 'Race': A_test.race, 'Sex': A_test.sex}
	ys_pred = { dt_classifier_id:y_pred_tr }

	from fairlearn.metrics._group_metric_set import _create_group_metric_set

	dash_dict = _create_group_metric_set(y_true=y_test,
	predictions=ys_pred,
	sensitive_features=sf,
	prediction_type='binary_classification')
	# 미리 계산된 공정성 메트릭을 애저 머신 러닝 서비스에 업로드

	from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id

	exp = Experiment(ws, "Test_Fairness_Census_Demo-testset")
	print(exp)

	run = exp.start_logging()

	# Upload the dashboard to Azure Machine Learning
	try:
	dashboard_title = "Fairness insights of Decision Tree Classifier"
	# Set validate_model_ids parameter of upload_dashboard_dictionary to False if you have not registered your model(s)
	upload_id = upload_dashboard_dictionary(run,
	dash_dict,
	dashboard_name=dashboard_title)
	print("\nUploaded to id: {0}\n".format(upload_id))

	# To test the dashboard, you can download it back and ensure it contains the right information
	downloaded_dict = download_dashboard_by_upload_id(run, upload_id)
	finally:
	run.complete()

	# 애저 머신 러닝 서비스 내에서 확인 가능