Skip to content

Instantly share code, notes, and snippets.

@ShivnarenSrinivasan
Last active March 27, 2022 10:23
Show Gist options
  • Save ShivnarenSrinivasan/f37de682ffc5af970858b8efc633ab86 to your computer and use it in GitHub Desktop.
Save ShivnarenSrinivasan/f37de682ffc5af970858b8efc633ab86 to your computer and use it in GitHub Desktop.
Module 3 Credit
from collections.abc import (
Collection,
)
from typing import (
Final,
)
import numpy as np
import pandas as pd
from sklearn import (
base,
preprocessing,
)
from sklearn.decomposition import (
PCA,
)
from sklearn.compose import (
make_column_selector,
make_column_transformer,
ColumnTransformer,
)
from sklearn.preprocessing import (
OrdinalEncoder,
StandardScaler,
OneHotEncoder,
RobustScaler,
MinMaxScaler,
)
from sklearn.pipeline import (
Pipeline,
make_pipeline,
)
from sklearn.metrics import (
accuracy_score,
f1_score,
recall_score,
precision_score,
)
TARGET: Final = 'DEFAULT_NEXT_MONTH'
# ---------------- IO -------------------------
def load_raw() -> pd.DataFrame:
return pd.read_csv(
'default_of_credit_card_clients.csv', header=1, index_col='ID'
).rename(columns={'default payment next month': TARGET})
# ------------------ CLEANING ---------------
def convert_cols(df: pd.DataFrame) -> pd.DataFrame:
# _pmt_mapping = {f'PAY_{i}': _convert_pmt_status for i in {0, 2, 3, 4, 5, 6}}
func_mapping = {
# 'SEX': _convert_sex,
'EDUCATION': _convert_education,
'MARRIAGE': _convert_marriage,
} # | _pmt_mapping
# mapping = {
# 'SEX': {1: 'Male', 2: 'Female'},
# 'EDUCATION': {1: 'Graduate School', 2: 'University', 3: 'High School', 4: 'Others'},
# 'MARRIAGE': {1: 'Married', 2: 'Single', 3: 'Others'},
# }
return df.assign(
SEX=lambda df: df['SEX'] - 1,
**{col: df[col].apply(func) for col, func in func_mapping.items()},
)
def _convert_sex(val: int) -> str:
mapping = {2: 'Male', 1: 'Female'}
return mapping[val]
def _convert_education(val: int) -> int:
# mapping = {1: 'Graduate School', 2: 'University', 3: 'High School'}
return val if val in range(1, 4) else 4
# return mapping.get(val, 'Others')
def _convert_marriage(val: int) -> int:
# mapping = {1: 'Married', 2: 'Single'}
return val if val in {1, 2} else 3
# return mapping.get(val, 'Others')
def _convert_pmt_status(val: int) -> int:
mapping = {
-2: 'No Consumption',
-1: 'Pay Due',
0: 'To Be Due',
9: 'Payment delay for 9+ months',
}
def _other(x: int) -> str:
return f'Payment Delay for {x} months'
return mapping.get(val, _other(val))
def _transform_series(ser: pd.Series, mapping: dict[int, str]) -> pd.Series:
return pd.Series.replace(ser, mapping).astype('category')
# ---------------- PREPROCESSING ---------------------------
def split_data(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.Series]:
return preprocess_inputs(df)
# return df.drop(columns=TARGET), df[TARGET]
def _preprocess() -> ColumnTransformer:
return make_column_transformer(
(StandardScaler(), make_column_selector(dtype_include=np.number)),
(
OneHotEncoder(handle_unknown='ignore'),
('MARRIAGE', 'EDUCATION'),
),
# (OrdinalEncoder(), make_column_selector(r'Pay_\d')),
remainder='passthrough',
)
# ------------------ MODEL ----------------
def build_model(model: base.ClassifierMixin) -> Pipeline:
return Pipeline(
[
# ('preprocess', _preprocess()),
# ('scale', StandardScaler()),
('model', model),
]
)
def build_tree(model: base.ClassifierMixin):
return make_pipeline(
# _preprocess(),
model,
)
# def train_model()
def evaluate_models(
models: Collection[base.ClassifierMixin],
X_train: pd.DataFrame,
y_train: pd.Series,
X: pd.DataFrame,
y: pd.Series,
) -> pd.DataFrame | pd.Series:
# results = pd.DataFrame(columns=['Accuracy', 'F1', 'Recall', 'Precision'])
for model in models:
model.fit(X_train, y_train)
return pd.concat(
[
evaluate_model(model.predict(X), y, str(model.named_steps['model']))
for model in models
]
)
def evaluate_model(
pred: np.ndarray, y: pd.Series | np.ndarray, model: str
) -> pd.DataFrame:
acc = accuracy_score(y, pred)
f1 = f1_score(y, pred)
recall = recall_score(y, pred)
precision = precision_score(y, pred)
return pd.DataFrame(
[[acc, f1, recall, precision]],
columns=['Accuracy', 'F1', 'Recall', 'Precision'],
index=[model],
)
# ------------- PROVIDED FUNCS --------
# write a function for onehot_encode
def onehot_encode(df: pd.DataFrame, column_dict: dict[str, str]) -> pd.DataFrame:
df = df.copy()
for key, val in column_dict.items():
unique = df[key].unique()
for unique_val in unique:
df[f'{val}_{unique_val}'] = _one_hot(df[key], unique_val)
df.drop(columns=key, inplace=True)
# YOUR CODE HERE
return df
def _one_hot(ser: pd.Series, val: int) -> np.ndarray:
return np.where(ser == val, 1, 0)
def preprocess_inputs(df: pd.DataFrame, target: str = TARGET):
df = df.copy()
df = onehot_encode(df, {'EDUCATION': 'EDU', 'MARRIAGE': 'MAR'})
# Split df into X and y
y = df[target].copy()
X = df.drop(columns=target)
# Scale X with a standard scaler
std_scale = StandardScaler()
X_ = pd.DataFrame(std_scale.fit_transform(X), columns=X.columns)
# YOUR CODE HERE
return X_, y
# ---------------------------------------
def __test_one_hot():
# External function
data = {'Sex': ['Male', 'Female', 'Male', 'Female'], 'Age': [1, 2, 3, 4]}
df = pd.DataFrame(data)
return __apply_one_hot_encode(df, df['Sex'])
def __apply_one_hot_encode(df: pd.DataFrame, column) -> pd.DataFrame:
# External function
header = set(dict(column).values())
dataset = dict()
for head in header:
dataset[head] = []
for value in column:
if head == value:
dataset[head].append(1)
else:
dataset[head].append(0)
df[head] = dataset[head]
return df
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment