Last active
June 3, 2024 19:45
-
-
Save mGalarnyk/16d15183f691594bc2c256505a4c42b1 to your computer and use it in GitHub Desktop.
Speeding up XGBoost model training by changing tree_method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import make_classification | |
from sklearn.model_selection import train_test_split | |
from xgboost import XGBClassifier | |
import time | |
# create synthetic dataset | |
X, y = make_classification(n_samples=100000, n_features=1000, n_informative=50, n_redundant=0, random_state=1) | |
# split data into training and test sets | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=1) | |
# define the datasets to evaluate each iteration | |
evalset = [(X_train, y_train), (X_test,y_test)] | |
results = [] | |
methods = ['exact', 'approx', 'hist', 'gpu_hist', 'auto'] | |
for method in methods: | |
model = XGBClassifier( | |
learning_rate=0.02, | |
n_estimators=50, | |
objective="binary:logistic", | |
use_label_encoder=False, | |
tree_method=method | |
) | |
start = time.time() | |
# fit the model | |
model.fit(X_train, y_train, eval_metric='logloss', eval_set=evalset) | |
end = time.time() | |
results.append(method + " Fit Time: " + str(end-start)) | |
print(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks @Jason2Brownlee! I will use time.perf_counter() for my future blogs.
I love your work btw.