Skip to content

Instantly share code, notes, and snippets.

View risenW's full-sized avatar

Rising Odegua risenW

View GitHub Profile
import xgboost as xgb
xgb_model = xgb.XGBRegressor(n_estimators=100)
print("MAE Sccore: ", cross_validate(xgb_model, 10, X_train, y_train))
import lightgbm as lgb
lgb_model = lgb.LGBMRegressor(n_estimators=100)
print("MAE Sccore: ", cross_validate(lgb_model, 10, X_train, y_train))
from catboost import CatBoostRegressor
cat_model = CatBoostRegressor(iterations=200,learning_rate=0.1,depth=4)
print("MAE Sccore: ", cross_validate(cat_model, 10, X_train, y_train))
@risenW
risenW / ds1.py
Last active December 2, 2019 17:00
data = pd.read_csv('some_csv_file.csv)
missing_percent = (data.isna().sum() / data.shape[0]) * 100
cols_2_drop = missing_percent[missing_percent.values >= 80].index
#Drop missing values
df = data.drop(cols_2_drop, axis=1)
@risenW
risenW / ds2.py
Last active December 2, 2019 17:00
data = pd.read_csv('some_csv_file.csv)
df = ds.drop_missing(data=data, percent=80)
@risenW
risenW / ds3.py
Last active December 2, 2019 13:57
import pandas as pd
import datasist as ds #import datasist library
import numpy as np
train_df = pd.read_csv('train_data.csv')
test_df = pd.read_csv('test_data.csv')
ds.structdata.check_train_test_set(train_df, test_df, index='Customer Id', col='Building Dimension')
all_data, ntrain, ntest = ds.structdata.join_train_and_test(train_df, test_df)
print("New size of combined data {}".format(all_data.shape))
print("Old size of train data: {}".format(ntrain))
print("Old size of test data: {}".format(ntest))
#later splitting after transformations
train = all_data[:ntrain]
test = all_data[ntrain:]
all_data, ntrain, ntest = ds.structdata.join_train_and_test(train_df, test_df)
print("New size of combined data {}".format(all_data.shape))
print("Old size of train data: {}".format(ntrain))
print("Old size of test data: {}".format(ntest))
#later splitting after transformations
train = all_data[:ntrain]
test = all_data[ntrain:]
import category_encoders as ce
# drop target column
target = train['Claim'].values
train.drop(columns='Claim', axis=1, inplace=True)
enc = ce.OrdinalEncoder(cols=['Geo_Code'])
enc.fit(train)
train_enc = enc.transform(train)
test_enc = enc.transform(test)