erdogant · August 18, 2022 13:54
diff --git a/hgboost_classification_example_fit.py b/hgboost_classification_example_fit.py
 # Import library
 from hgboost import hgboost

 # Initialize with default settings.
 hgb = hgboost()
 # hgb = hgboost(max_eval=250, threshold=0.5, cv=5, test_size=0.2, val_size=0.2, top_cv_evals=10, random_state=1, verbose=3)

 # Load example Titanic Dataset
 df = hgb.import_example()

 # Prepare data for classification by cleaning and onehot encoding.
 del df['PassengerId']
 del df['Name']
 y = df['Survived'].values
 del df['Survived']
 X = hgb.preprocessing(df, verbose=0)

 print(X)
 #      Pclass_1.0  Pclass_2.0  Pclass_3.0  ...  Embarked_None  Embarked_Q  Embarked_S
 # 0         False       False        True  ...          False       False        True
 # 1          True       False       False  ...          False       False       False
 # 2         False       False        True  ...          False       False        True
 # 3          True       False       False  ...          False       False        True
 # 4         False       False        True  ...          False       False        True
 # ..          ...         ...         ...  ...            ...         ...         ...
 # 886       False        True       False  ...          False       False        True
 # 887        True       False       False  ...          False       False        True
 # 888       False       False        True  ...          False       False        True
 # 889        True       False       False  ...          False       False       False
 # 890       False       False        True  ...          False        True       False

 # [891 rows x 203 columns]

 print(y)
 # [0 1 1 1 0 0 0 ... 0 0 0 0 1 0 1 0]
 
 # Fit best model using the AUC as evaluation metric:
 results = hgb.xgboost(X, y, pos_label=1, eval_metric='auc')
 # results = hgb.catboost(X, y, pos_label=1, eval_metric='auc')
 # results = hgb.lightboost(X, y, pos_label=1, eval_metric='auc')

 [hgboost] >Start hgboost classification.
 # [hgboost] >Collecting xgb_clf parameters.
 # [hgboost] >Correct for unbalanced classes using [scale_pos_weight]..
 # [hgboost] >[13] hyperparameters in gridsearch space. Used loss function: [auc].
 # [hgboost] >method: xgb_clf
 # [hgboost] >eval_metric: auc
 # [hgboost] >greater_is_better: True
 # [hgboost] >Total dataset: (891, 203) 
 # [hgboost] >Validation set: (179, 203) 
 # [hgboost] >Train-set: (491, 203) 
 # [hgboost] >Test-set: (221, 203) 
 # [hgboost] >Searching across hyperparameter space for best performing parameters using maximum nr. evaluations: 250
 # 100%|██████████| 250/250 [02:38<00:00,  1.58trial/s, best loss: -0.8570934256055364]
 # [hgboost]> Collecting the hyper-parameters from the [250] trials.
 # [hgboost] >Best performing [xgb_clf] model: auc=0.857093
 # [hgboost] >5-fold cross validation for the top 10 scoring models, Total nr. tests: 50
 # 100%|██████████| 10/10 [00:18<00:00,  1.82s/it]
 # [hgboost] >Evaluate best [xgb_clf] model on validation dataset (179 samples, 20%)
 # [hgboost] >[auc]: -0.8557 using HyperOptimized parameters on validation set.
 # [hgboost] >[auc]: -0.8401 using default (not optimized) parameters on validation set.
 # [hgboost] >Retrain [xgb_clf] on the entire dataset with the optimal parameters settings.
 # [hgboost] >Fin!
	# Import library
	from hgboost import hgboost

	# Initialize with default settings.
	hgb = hgboost()
	# hgb = hgboost(max_eval=250, threshold=0.5, cv=5, test_size=0.2, val_size=0.2, top_cv_evals=10, random_state=1, verbose=3)

	# Load example Titanic Dataset
	df = hgb.import_example()

	# Prepare data for classification by cleaning and onehot encoding.
	del df['PassengerId']
	del df['Name']
	y = df['Survived'].values
	del df['Survived']
	X = hgb.preprocessing(df, verbose=0)

	print(X)
	# Pclass_1.0 Pclass_2.0 Pclass_3.0 ... Embarked_None Embarked_Q Embarked_S
	# 0 False False True ... False False True
	# 1 True False False ... False False False
	# 2 False False True ... False False True
	# 3 True False False ... False False True
	# 4 False False True ... False False True
	# .. ... ... ... ... ... ... ...
	# 886 False True False ... False False True
	# 887 True False False ... False False True
	# 888 False False True ... False False True
	# 889 True False False ... False False False
	# 890 False False True ... False True False

	# [891 rows x 203 columns]

	print(y)
	# [0 1 1 1 0 0 0 ... 0 0 0 0 1 0 1 0]

	# Fit best model using the AUC as evaluation metric:
	results = hgb.xgboost(X, y, pos_label=1, eval_metric='auc')
	# results = hgb.catboost(X, y, pos_label=1, eval_metric='auc')
	# results = hgb.lightboost(X, y, pos_label=1, eval_metric='auc')

	[hgboost] >Start hgboost classification.
	# [hgboost] >Collecting xgb_clf parameters.
	# [hgboost] >Correct for unbalanced classes using [scale_pos_weight]..
	# [hgboost] >[13] hyperparameters in gridsearch space. Used loss function: [auc].
	# [hgboost] >method: xgb_clf
	# [hgboost] >eval_metric: auc
	# [hgboost] >greater_is_better: True
	# [hgboost] >Total dataset: (891, 203)
	# [hgboost] >Validation set: (179, 203)
	# [hgboost] >Train-set: (491, 203)
	# [hgboost] >Test-set: (221, 203)
	# [hgboost] >Searching across hyperparameter space for best performing parameters using maximum nr. evaluations: 250
	# 100%\|██████████\| 250/250 [02:38<00:00, 1.58trial/s, best loss: -0.8570934256055364]
	# [hgboost]> Collecting the hyper-parameters from the [250] trials.
	# [hgboost] >Best performing [xgb_clf] model: auc=0.857093
	# [hgboost] >5-fold cross validation for the top 10 scoring models, Total nr. tests: 50
	# 100%\|██████████\| 10/10 [00:18<00:00, 1.82s/it]
	# [hgboost] >Evaluate best [xgb_clf] model on validation dataset (179 samples, 20%)
	# [hgboost] >[auc]: -0.8557 using HyperOptimized parameters on validation set.
	# [hgboost] >[auc]: -0.8401 using default (not optimized) parameters on validation set.
	# [hgboost] >Retrain [xgb_clf] on the entire dataset with the optimal parameters settings.
	# [hgboost] >Fin!
No results found