Created
September 8, 2024 14:14
-
-
Save BexTuychiev/0159e9dd4cbba8b7903368ebfd31c44c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import warnings | |
import neptune | |
from dotenv import load_dotenv | |
from lightgbm import LGBMClassifier | |
from xgboost import XGBClassifier | |
from sklearn.datasets import make_classification | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score | |
from tqdm.notebook import tqdm | |
load_dotenv() | |
warnings.filterwarnings('ignore') | |
api_token = os.getenv("NEPTUNE_API_TOKEN") | |
project = os.getenv("NEPTUNE_PROJECT_NAME") | |
def create_run(name): | |
run = neptune.init_run( | |
project=os.getenv("NEPTUNE_PROJECT_NAME"), | |
api_token=os.getenv("NEPTUNE_API_TOKEN"), | |
custom_run_id=name | |
) | |
return run | |
# Creating two separate experiments | |
lgbm_run = create_run('LightGBM') | |
xgb_run = create_run('XGBoost') | |
# Configuration for our custom dataset | |
min_samples = 1000 | |
max_samples = 20000 | |
step = 1000 | |
for sample_size in tqdm(range(min_samples, max_samples + step, step)): | |
# Generating the dataset of custom sample size | |
X, y = make_classification(n_samples=sample_size) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y) | |
# XGBoost training | |
xgb_model = XGBClassifier(random_state=42, verbosity=0) | |
start = time.time() | |
xgb_model.fit(X_train, y_train) | |
end = time.time() | |
xgb_runtime = end - start | |
xgb_accuracy = accuracy_score(y_test, xgb_model.predict(X_test)) | |
# LightGBM training | |
lgbm_model = LGBMClassifier(random_state=42, verbosity=-1) | |
start = time.time() | |
lgbm_model.fit(X_train, y_train) | |
end = time.time() | |
lgbm_runtime = end - start | |
lgbm_accuracy = accuracy_score(y_test, lgbm_model.predict(X_test)) | |
# Logging | |
lgbm_run["metrics/comparison/runtime"].append(lgbm_runtime, step=sample_size) | |
lgbm_run["metrics/comparison/accuracy"].append(lgbm_accuracy, step=sample_size) | |
xgb_run["metrics/comparison/accuracy"].append(xgb_accuracy, step=sample_size) | |
xgb_run["metrics/comparison/runtime"].append(xgb_runtime, step=sample_size) | |
xgb_run.stop() | |
lgbm_run.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment