Last active
June 7, 2022 04:53
-
-
Save rkdgusrn1212/917c9fc31739a1d62b46a7da263f4e50 to your computer and use it in GitHub Desktop.
Mutual Information
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
from sklearn.feature_selection import mutual_info_regression | |
# Set Matplotlib defaults | |
plt.style.use("seaborn-whitegrid") | |
plt.rc("figure", autolayout=True) | |
plt.rc( | |
"axes", | |
labelweight="bold", | |
labelsize="large", | |
titleweight="bold", | |
titlesize=14, | |
titlepad=10, | |
) | |
#X는 DataFrame | |
#y는 Series | |
def make_mi_scores(X, y): | |
X = X.copy() | |
for colname in X.select_dtypes(["object", "category"]): | |
X[colname], _ = X[colname].factorize()#ordered encoding 수행, codes(=결과 ndarray), unique 반환 | |
#unique는 컬럼의 dType에 따라 unique value들을 담는 클래스 type을 다르게 줌, | |
#Categorical 일땐 Categorical 다른panda object일땐 Index, 나머지는 ndarray | |
discrete_features = [pd.api.types.is_integer_dtype(t) for t in X.dtypes] | |
#pd.api.types.is_integer_dtype() 입력 dtype(혹은 string alias), array-like가 NumPy와 Pandas의 integer의 확장 타입일때만 True | |
mi_scores = mutual_info_regression(X, y, discrete_features=discrete_features, random_state=0)#feature-target간의 mutual_info_score반환 | |
mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)#index로 column이름을 씌워줌 | |
mi_scores = mi_scores.sort_values(ascending=False)#score를 내림차순으로 나열. | |
return mi_scores | |
def plot_mi_scores(scores): | |
scores = scores.sort_values(ascending=True)#score를 오름차순으로 나열. plot은 가장 앞선 원소가 밑에 오기때문에. | |
y_space = np.arange(len(scores)) | |
ticks = list(scores.index) | |
plt.barh(y_space, scores) #수평바그리기 | |
plt.yticks(y_space, ticks) #수평바 라벨링하기 | |
plt.title("Mutual Information Scores") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment