Skip to content

Instantly share code, notes, and snippets.

@BexTuychiev
Created September 8, 2024 14:14
Show Gist options
  • Save BexTuychiev/0159e9dd4cbba8b7903368ebfd31c44c to your computer and use it in GitHub Desktop.
Save BexTuychiev/0159e9dd4cbba8b7903368ebfd31c44c to your computer and use it in GitHub Desktop.
import os
import time
import warnings
import neptune
from dotenv import load_dotenv
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
load_dotenv()
warnings.filterwarnings('ignore')
api_token = os.getenv("NEPTUNE_API_TOKEN")
project = os.getenv("NEPTUNE_PROJECT_NAME")
def create_run(name):
run = neptune.init_run(
project=os.getenv("NEPTUNE_PROJECT_NAME"),
api_token=os.getenv("NEPTUNE_API_TOKEN"),
custom_run_id=name
)
return run
# Creating two separate experiments
lgbm_run = create_run('LightGBM')
xgb_run = create_run('XGBoost')
# Configuration for our custom dataset
min_samples = 1000
max_samples = 20000
step = 1000
for sample_size in tqdm(range(min_samples, max_samples + step, step)):
# Generating the dataset of custom sample size
X, y = make_classification(n_samples=sample_size)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
# XGBoost training
xgb_model = XGBClassifier(random_state=42, verbosity=0)
start = time.time()
xgb_model.fit(X_train, y_train)
end = time.time()
xgb_runtime = end - start
xgb_accuracy = accuracy_score(y_test, xgb_model.predict(X_test))
# LightGBM training
lgbm_model = LGBMClassifier(random_state=42, verbosity=-1)
start = time.time()
lgbm_model.fit(X_train, y_train)
end = time.time()
lgbm_runtime = end - start
lgbm_accuracy = accuracy_score(y_test, lgbm_model.predict(X_test))
# Logging
lgbm_run["metrics/comparison/runtime"].append(lgbm_runtime, step=sample_size)
lgbm_run["metrics/comparison/accuracy"].append(lgbm_accuracy, step=sample_size)
xgb_run["metrics/comparison/accuracy"].append(xgb_accuracy, step=sample_size)
xgb_run["metrics/comparison/runtime"].append(xgb_runtime, step=sample_size)
xgb_run.stop()
lgbm_run.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment