Skip to content

Instantly share code, notes, and snippets.

@RAMitchell
Created May 19, 2020 02:31
Show Gist options
  • Save RAMitchell/e9cdbe773e7efef06082c9b1f65b73b0 to your computer and use it in GitHub Desktop.
Save RAMitchell/e9cdbe773e7efef06082c9b1f65b73b0 to your computer and use it in GitHub Desktop.
Demonstration of memory usage for XGBoost DeviceQuantileDMatrix
import xgboost as xgb
import cupy as cp
import GPUtil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("seaborn")
dmatrix_type = [xgb.DMatrix, xgb.DeviceQuantileDMatrix]
test_sizes = [(300000, 1000), (30000, 10000)]
bar_index = np.arange(len(test_sizes))
bar_width = 0.35
for i, DMatrixT in enumerate(dmatrix_type):
res = []
for size in test_sizes:
X = cp.random.random(size, dtype='float32')
y = cp.random.random(size[0], dtype='float32')
mem_before = GPUtil.getGPUs()[0].memoryUsed
param = {'objective': 'binary:logistic',
'tree_method': 'gpu_hist'}
dtrain = DMatrixT(X, label=y)
bst = xgb.train(param, dtrain)
usage = GPUtil.getGPUs()[0].memoryUsed - mem_before
res.append(usage)
del dtrain, bst, X, y
plt.bar(bar_index + i * bar_width, res, bar_width, label=str(DMatrixT))
plt.title("DMatrix device memory usage")
plt.xticks(bar_index + bar_width / 2, test_sizes)
plt.legend()
plt.xlabel("Dataset dimension")
plt.ylabel("Mb")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment