Skip to content

Instantly share code, notes, and snippets.

@rkdgusrn1212
Last active June 7, 2022 04:53
Show Gist options
  • Save rkdgusrn1212/917c9fc31739a1d62b46a7da263f4e50 to your computer and use it in GitHub Desktop.
Save rkdgusrn1212/917c9fc31739a1d62b46a7da263f4e50 to your computer and use it in GitHub Desktop.
Mutual Information
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.feature_selection import mutual_info_regression
# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True)
plt.rc(
"axes",
labelweight="bold",
labelsize="large",
titleweight="bold",
titlesize=14,
titlepad=10,
)
#X는 DataFrame
#y는 Series
def make_mi_scores(X, y):
X = X.copy()
for colname in X.select_dtypes(["object", "category"]):
X[colname], _ = X[colname].factorize()#ordered encoding 수행, codes(=결과 ndarray), unique 반환
#unique는 컬럼의 dType에 따라 unique value들을 담는 클래스 type을 다르게 줌,
#Categorical 일땐 Categorical 다른panda object일땐 Index, 나머지는 ndarray
discrete_features = [pd.api.types.is_integer_dtype(t) for t in X.dtypes]
#pd.api.types.is_integer_dtype() 입력 dtype(혹은 string alias), array-like가 NumPy와 Pandas의 integer의 확장 타입일때만 True
mi_scores = mutual_info_regression(X, y, discrete_features=discrete_features, random_state=0)#feature-target간의 mutual_info_score반환
mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)#index로 column이름을 씌워줌
mi_scores = mi_scores.sort_values(ascending=False)#score를 내림차순으로 나열.
return mi_scores
def plot_mi_scores(scores):
scores = scores.sort_values(ascending=True)#score를 오름차순으로 나열. plot은 가장 앞선 원소가 밑에 오기때문에.
y_space = np.arange(len(scores))
ticks = list(scores.index)
plt.barh(y_space, scores) #수평바그리기
plt.yticks(y_space, ticks) #수평바 라벨링하기
plt.title("Mutual Information Scores")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment