This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xgboost as xgb | |
xgb_model = xgb.XGBRegressor(n_estimators=100) | |
print("MAE Sccore: ", cross_validate(xgb_model, 10, X_train, y_train)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lightgbm as lgb | |
lgb_model = lgb.LGBMRegressor(n_estimators=100) | |
print("MAE Sccore: ", cross_validate(lgb_model, 10, X_train, y_train)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catboost import CatBoostRegressor | |
cat_model = CatBoostRegressor(iterations=200,learning_rate=0.1,depth=4) | |
print("MAE Sccore: ", cross_validate(cat_model, 10, X_train, y_train)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = pd.read_csv('some_csv_file.csv) | |
missing_percent = (data.isna().sum() / data.shape[0]) * 100 | |
cols_2_drop = missing_percent[missing_percent.values >= 80].index | |
#Drop missing values | |
df = data.drop(cols_2_drop, axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = pd.read_csv('some_csv_file.csv) | |
df = ds.drop_missing(data=data, percent=80) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import datasist as ds #import datasist library | |
import numpy as np | |
train_df = pd.read_csv('train_data.csv') | |
test_df = pd.read_csv('test_data.csv') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ds.structdata.check_train_test_set(train_df, test_df, index='Customer Id', col='Building Dimension') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all_data, ntrain, ntest = ds.structdata.join_train_and_test(train_df, test_df) | |
print("New size of combined data {}".format(all_data.shape)) | |
print("Old size of train data: {}".format(ntrain)) | |
print("Old size of test data: {}".format(ntest)) | |
#later splitting after transformations | |
train = all_data[:ntrain] | |
test = all_data[ntrain:] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all_data, ntrain, ntest = ds.structdata.join_train_and_test(train_df, test_df) | |
print("New size of combined data {}".format(all_data.shape)) | |
print("Old size of train data: {}".format(ntrain)) | |
print("Old size of test data: {}".format(ntest)) | |
#later splitting after transformations | |
train = all_data[:ntrain] | |
test = all_data[ntrain:] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import category_encoders as ce | |
# drop target column | |
target = train['Claim'].values | |
train.drop(columns='Claim', axis=1, inplace=True) | |
enc = ce.OrdinalEncoder(cols=['Geo_Code']) | |
enc.fit(train) | |
train_enc = enc.transform(train) | |
test_enc = enc.transform(test) |