Skip to content

Instantly share code, notes, and snippets.

View rkdgusrn1212's full-sized avatar

강현구 (Hyungu Kang) rkdgusrn1212

View GitHub Profile
@rkdgusrn1212
rkdgusrn1212 / drop_categorical_col.py
Created May 31, 2022 17:19
Drop Categorical Variables
import pandas as pd
#data : 전처리할 DataFrame
preproccessed_data = data.select_dtypes(exclude=['object'])#object 타입을 제외한 DataFrame을 생성해 반환, 타입은 object-float64-int64가 있다
@rkdgusrn1212
rkdgusrn1212 / imputation.py
Last active May 31, 2022 11:06
Missing Values : 2) Imputation
import pandas as pd
from sklearn.impute import SimpleImputer
data = pd.DataFrame()
imputer = SimpleImputer()
imputed_data = pd.DataFrame(imputer.fit_transform(data)) #imputed 된 dataframe, column이름도 index로 치환됨...
imputed_data.columns = data.columns #바뀐 column이름들 다시 넣어주기
@rkdgusrn1212
rkdgusrn1212 / drop_columns.py
Last active May 31, 2022 17:08
Missing Value : 1) Drop Colunms
import pandas as pd
data = pd.DataFrame()
#data.isnull() 은 data의 각 value의 null여부를 담고있는 dataframe을 반환함.
#data.sum()은 각 row에서의 총합을 가진 series를 반환한다.
#data.any()는 각 row에서 하나라도 True를 가졌는지 여부를 가진 series를 반환한다.
#series에 대한 연산자 연산은 각 value에대한 연산을 수행한 결과를 series로 반환합니다.
#series의 인덱스로 series또는 list를 넣으면 인덱스로 구한 값들을 가진 series를 반환합니다.
missing_val_cols = [col for col in data.columns
if data[col].isnull().any()]
@rkdgusrn1212
rkdgusrn1212 / summary_plog.py
Created May 30, 2022 18:02
SHAP Summary Plot
import shap # package used to calculate Shap values
#tree or deep or kernel
explainer = shap.TreeExplainer(my_model)
shap_values = explainer.shap_values(val_X)#전체 record에 대한 shap 값들이 필요하다.
shap.summary_plot(shap_values[1], val_X)#shap_values[1]는 regression에서 possitive.
@rkdgusrn1212
rkdgusrn1212 / shap.py
Created May 30, 2022 14:50
SHAP in python
import shap
#Tree, Deep or kernel
explainer = shap.TreeExplainer(my_model)
shap_values = explainer.shap_values(single_record)
shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0], single_record)
@rkdgusrn1212
rkdgusrn1212 / Model.java
Last active May 28, 2022 17:16
A-star model in java
import java.util.PriorityQueue;
import java.util.ArrayList;
class Model{
private class State implements Comparable<State>{
int value;
int pathCost;
@Override
public int compareTo(State state){
@rkdgusrn1212
rkdgusrn1212 / partial_dependence_plot.py
Last active May 27, 2022 16:55
Partial Dependence Plot
from matplotlib import pyplot as plt
from pdpbox import pdp
#my_model, val_X, feature_list
# isolate pdp
pdp_iso = pdp.pdp_isolate(model=my_model, dataset=val_X, model_features=feature_list, feature='feature1')
pdp.pdp_plot(pdp_iso, 'feature1')
plt.show()
@rkdgusrn1212
rkdgusrn1212 / permutation_importance.py
Created May 25, 2022 21:08
Permutation Importance
import eli5
from eli5.sklearn import PermutationImportance
#model 이미 train이 끝난 모델
perm = PermutationImportance(model).fit(val_X, val_y)
#각 feature들의 weight를 출력
eli5.show_weights(perm, feature_names = val_X.columns.tolist())
@rkdgusrn1212
rkdgusrn1212 / image_geometric_transformation.py
Last active May 23, 2019 03:58
Image Geometric Transformation in Python (without PIL.Image.resize() or rotate())
from PIL import Image
import math
img_path = input("변환시킬 이미지의 경로를 입력하세요 : ")#크기를 변환할 이미지 경로 입력.
img = Image.open(img_path)
img.show("변환 전 이미지")
width, height = img.size#원본 이미지 크기
width_rate = float(input("가로방향 변환 비율 : "))
@rkdgusrn1212
rkdgusrn1212 / image_scaling.py
Created May 22, 2019 10:58
Image Scaling using Pillow in Python
from PIL import Image
import numpy as np
img_path = input("변환시킬 이미지의 경로를 입력하세요 : ")#크기를 변환할 이미지 경로 입력.
img = Image.open(img_path)
img.show("변환 전 이미지")
[height, width, band] = np.shape(img)# 이미지의 row, col, 채널 수
height_rate = float(input("새로방향 변환 비율 : "))