Skip to content

Instantly share code, notes, and snippets.

View rkdgusrn1212's full-sized avatar

강현구 (Hyungu Kang) rkdgusrn1212

View GitHub Profile
@rkdgusrn1212
rkdgusrn1212 / partial_dependence_plot.py
Last active May 27, 2022 16:55
Partial Dependence Plot
from matplotlib import pyplot as plt
from pdpbox import pdp
#my_model, val_X, feature_list
# isolate pdp
pdp_iso = pdp.pdp_isolate(model=my_model, dataset=val_X, model_features=feature_list, feature='feature1')
pdp.pdp_plot(pdp_iso, 'feature1')
plt.show()
@rkdgusrn1212
rkdgusrn1212 / Model.java
Last active May 28, 2022 17:16
A-star model in java
import java.util.PriorityQueue;
import java.util.ArrayList;
class Model{
private class State implements Comparable<State>{
int value;
int pathCost;
@Override
public int compareTo(State state){
@rkdgusrn1212
rkdgusrn1212 / shap.py
Created May 30, 2022 14:50
SHAP in python
import shap
#Tree, Deep or kernel
explainer = shap.TreeExplainer(my_model)
shap_values = explainer.shap_values(single_record)
shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0], single_record)
@rkdgusrn1212
rkdgusrn1212 / summary_plog.py
Created May 30, 2022 18:02
SHAP Summary Plot
import shap # package used to calculate Shap values
#tree or deep or kernel
explainer = shap.TreeExplainer(my_model)
shap_values = explainer.shap_values(val_X)#전체 record에 대한 shap 값들이 필요하다.
shap.summary_plot(shap_values[1], val_X)#shap_values[1]는 regression에서 possitive.
@rkdgusrn1212
rkdgusrn1212 / drop_columns.py
Last active May 31, 2022 17:08
Missing Value : 1) Drop Colunms
import pandas as pd
data = pd.DataFrame()
#data.isnull() 은 data의 각 value의 null여부를 담고있는 dataframe을 반환함.
#data.sum()은 각 row에서의 총합을 가진 series를 반환한다.
#data.any()는 각 row에서 하나라도 True를 가졌는지 여부를 가진 series를 반환한다.
#series에 대한 연산자 연산은 각 value에대한 연산을 수행한 결과를 series로 반환합니다.
#series의 인덱스로 series또는 list를 넣으면 인덱스로 구한 값들을 가진 series를 반환합니다.
missing_val_cols = [col for col in data.columns
if data[col].isnull().any()]
@rkdgusrn1212
rkdgusrn1212 / imputation.py
Last active May 31, 2022 11:06
Missing Values : 2) Imputation
import pandas as pd
from sklearn.impute import SimpleImputer
data = pd.DataFrame()
imputer = SimpleImputer()
imputed_data = pd.DataFrame(imputer.fit_transform(data)) #imputed 된 dataframe, column이름도 index로 치환됨...
imputed_data.columns = data.columns #바뀐 column이름들 다시 넣어주기
@rkdgusrn1212
rkdgusrn1212 / drop_categorical_col.py
Created May 31, 2022 17:19
Drop Categorical Variables
import pandas as pd
#data : 전처리할 DataFrame
preproccessed_data = data.select_dtypes(exclude=['object'])#object 타입을 제외한 DataFrame을 생성해 반환, 타입은 object-float64-int64가 있다
@rkdgusrn1212
rkdgusrn1212 / ordinal_encode_categorical_col.py
Created May 31, 2022 17:50
Ordinal Encode Categorical Column
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
# Categorical columns
object_cols = [col for col in train_data.columns if train_data[col].dtype == "object"]
# valid_data에서의 value set이 train_data에서의 value set의 부분집합인 column들, train_data에 fit된 encoder를 사용하기위한 필요조건이다.
good_cols = [col for col in object_cols if set(valid_data[col]).issubset(set(train_data[col]))]
ordinal_encoder = OrdinalEncoder()
@rkdgusrn1212
rkdgusrn1212 / one_hot_encoding.py
Last active June 1, 2022 01:46
One-Hot encoding
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
#너무 많은 차원으로 분리되지 않을 컬럼들만 인코딩한다.
low_cardinality_cols = [col for col in object_cols if X_train[col].nunique() < 10]
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
OH_cols_train = pd.DataFrame(encoder.fit_transform(X_train[low_cardinality_cols]))
OH_cols_valid = pd.DataFrame(encoder.transform(X_valid[low_cardinality_cols]))
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
#read data
train_data = pd.read_csv('train.csv', index_col='Id')
test_data = pd.read_csv('test.csv', index_col='Id')