Last active
August 18, 2022 13:54
-
-
Save erdogant/24718ed67d0faedb83c32b76c875fae0 to your computer and use it in GitHub Desktop.
hgboost
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Import library | |
| from hgboost import hgboost | |
| # Initialize with default settings. | |
| hgb = hgboost() | |
| # hgb = hgboost(max_eval=250, threshold=0.5, cv=5, test_size=0.2, val_size=0.2, top_cv_evals=10, random_state=1, verbose=3) | |
| # Load example Titanic Dataset | |
| df = hgb.import_example() | |
| # Prepare data for classification by cleaning and onehot encoding. | |
| del df['PassengerId'] | |
| del df['Name'] | |
| y = df['Survived'].values | |
| del df['Survived'] | |
| X = hgb.preprocessing(df, verbose=0) | |
| print(X) | |
| # Pclass_1.0 Pclass_2.0 Pclass_3.0 ... Embarked_None Embarked_Q Embarked_S | |
| # 0 False False True ... False False True | |
| # 1 True False False ... False False False | |
| # 2 False False True ... False False True | |
| # 3 True False False ... False False True | |
| # 4 False False True ... False False True | |
| # .. ... ... ... ... ... ... ... | |
| # 886 False True False ... False False True | |
| # 887 True False False ... False False True | |
| # 888 False False True ... False False True | |
| # 889 True False False ... False False False | |
| # 890 False False True ... False True False | |
| # [891 rows x 203 columns] | |
| print(y) | |
| # [0 1 1 1 0 0 0 ... 0 0 0 0 1 0 1 0] | |
| # Fit best model using the AUC as evaluation metric: | |
| results = hgb.xgboost(X, y, pos_label=1, eval_metric='auc') | |
| # results = hgb.catboost(X, y, pos_label=1, eval_metric='auc') | |
| # results = hgb.lightboost(X, y, pos_label=1, eval_metric='auc') | |
| [hgboost] >Start hgboost classification. | |
| # [hgboost] >Collecting xgb_clf parameters. | |
| # [hgboost] >Correct for unbalanced classes using [scale_pos_weight].. | |
| # [hgboost] >[13] hyperparameters in gridsearch space. Used loss function: [auc]. | |
| # [hgboost] >method: xgb_clf | |
| # [hgboost] >eval_metric: auc | |
| # [hgboost] >greater_is_better: True | |
| # [hgboost] >Total dataset: (891, 203) | |
| # [hgboost] >Validation set: (179, 203) | |
| # [hgboost] >Train-set: (491, 203) | |
| # [hgboost] >Test-set: (221, 203) | |
| # [hgboost] >Searching across hyperparameter space for best performing parameters using maximum nr. evaluations: 250 | |
| # 100%|██████████| 250/250 [02:38<00:00, 1.58trial/s, best loss: -0.8570934256055364] | |
| # [hgboost]> Collecting the hyper-parameters from the [250] trials. | |
| # [hgboost] >Best performing [xgb_clf] model: auc=0.857093 | |
| # [hgboost] >5-fold cross validation for the top 10 scoring models, Total nr. tests: 50 | |
| # 100%|██████████| 10/10 [00:18<00:00, 1.82s/it] | |
| # [hgboost] >Evaluate best [xgb_clf] model on validation dataset (179 samples, 20%) | |
| # [hgboost] >[auc]: -0.8557 using HyperOptimized parameters on validation set. | |
| # [hgboost] >[auc]: -0.8401 using default (not optimized) parameters on validation set. | |
| # [hgboost] >Retrain [xgb_clf] on the entire dataset with the optimal parameters settings. | |
| # [hgboost] >Fin! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment