Skip to content

Instantly share code, notes, and snippets.

@Eligijus112
Created March 22, 2021 18:21
Show Gist options
  • Save Eligijus112/64d5a2c22e0a8258fcda9c6d0c7f8692 to your computer and use it in GitHub Desktop.
Save Eligijus112/64d5a2c22e0a8258fcda9c6d0c7f8692 to your computer and use it in GitHub Desktop.
Gist for evaluation of xgboost speeds for different hp
# Defining the grid of parameters
n_estimators = [30, 100, 200, 300, 600, 900]
max_depth = [4, 6, 8, 12, 14, 16]
# Number of rows to test on
nrows = 30000
# Creating a dictionary
gpu_dict = {
'objective': ['reg:squarederror'],
'n_estimators': n_estimators,
'max_depth': max_depth,
'tree_method': ['gpu_hist']
}
cpu_dict = {
'objective': ['reg:squarederror'],
'n_estimators': n_estimators,
'max_depth': max_depth
}
# Creating the grid of hyper parameters
gpu_hp = ParameterGrid(gpu_dict)
cpu_hp = ParameterGrid(cpu_dict)
# Getting the random subset of data
subset = d.sample(nrows)
# Iterating through all hte hyper parameters:
gpu_speeds = []
for params in gpu_hp:
# Creating X and Y
X = pd.get_dummies(subset[features])
Y = subset['Sales']
# Initiating the model objects
gpu = xgb.XGBRegressor(**params)
# Training on cpu
start = time.time()
gpu.fit(X, Y)
gpu_speed = time.time() - start
gpu_speeds.append(gpu_speed)
# Iterating through all hte hyper parameters:
cpu_speeds = []
for params in cpu_hp:
# Creating X and Y
X = pd.get_dummies(subset[features])
Y = subset['Sales']
# Initiating the model objects
cpu = xgb.XGBRegressor(**params)
# Training on cpu
start = time.time()
cpu.fit(X, Y)
cpu_speed = time.time() - start
cpu_speeds.append(cpu_speed)
# Creating the speed frame
speeds = pd.DataFrame({
'n_estimators': [x.get('n_estimators') for x in list(cpu_hp)],
'max_depth': [x.get('max_depth') for x in list(cpu_hp)],
'cpu_speed': cpu_speeds,
'gpu_speed': gpu_speeds
})
# Diff of speed between GPU and CPU
speeds['diff'] = speeds['cpu_speed'] - speeds['gpu_speed']
# Creating the coordinates for surface plot
X, Y = np.meshgrid(n_estimators, max_depth)
Z = speeds['diff'].values
Z = np.array(Z).reshape((len(n_estimators), len(max_depth)))
# PLoting
fig = plt.figure(figsize=(12, 12))
ax = plt.axes(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, edgecolor='none', alpha=0.7)
ax.set_xlabel('n_estimators')
ax.set_ylabel('max_depth')
ax.set_zlabel('GPU speed advantage (s)')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment