Created
March 22, 2021 18:21
-
-
Save Eligijus112/64d5a2c22e0a8258fcda9c6d0c7f8692 to your computer and use it in GitHub Desktop.
Gist for evaluation of xgboost speeds for different hp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Defining the grid of parameters | |
| n_estimators = [30, 100, 200, 300, 600, 900] | |
| max_depth = [4, 6, 8, 12, 14, 16] | |
| # Number of rows to test on | |
| nrows = 30000 | |
| # Creating a dictionary | |
| gpu_dict = { | |
| 'objective': ['reg:squarederror'], | |
| 'n_estimators': n_estimators, | |
| 'max_depth': max_depth, | |
| 'tree_method': ['gpu_hist'] | |
| } | |
| cpu_dict = { | |
| 'objective': ['reg:squarederror'], | |
| 'n_estimators': n_estimators, | |
| 'max_depth': max_depth | |
| } | |
| # Creating the grid of hyper parameters | |
| gpu_hp = ParameterGrid(gpu_dict) | |
| cpu_hp = ParameterGrid(cpu_dict) | |
| # Getting the random subset of data | |
| subset = d.sample(nrows) | |
| # Iterating through all hte hyper parameters: | |
| gpu_speeds = [] | |
| for params in gpu_hp: | |
| # Creating X and Y | |
| X = pd.get_dummies(subset[features]) | |
| Y = subset['Sales'] | |
| # Initiating the model objects | |
| gpu = xgb.XGBRegressor(**params) | |
| # Training on cpu | |
| start = time.time() | |
| gpu.fit(X, Y) | |
| gpu_speed = time.time() - start | |
| gpu_speeds.append(gpu_speed) | |
| # Iterating through all hte hyper parameters: | |
| cpu_speeds = [] | |
| for params in cpu_hp: | |
| # Creating X and Y | |
| X = pd.get_dummies(subset[features]) | |
| Y = subset['Sales'] | |
| # Initiating the model objects | |
| cpu = xgb.XGBRegressor(**params) | |
| # Training on cpu | |
| start = time.time() | |
| cpu.fit(X, Y) | |
| cpu_speed = time.time() - start | |
| cpu_speeds.append(cpu_speed) | |
| # Creating the speed frame | |
| speeds = pd.DataFrame({ | |
| 'n_estimators': [x.get('n_estimators') for x in list(cpu_hp)], | |
| 'max_depth': [x.get('max_depth') for x in list(cpu_hp)], | |
| 'cpu_speed': cpu_speeds, | |
| 'gpu_speed': gpu_speeds | |
| }) | |
| # Diff of speed between GPU and CPU | |
| speeds['diff'] = speeds['cpu_speed'] - speeds['gpu_speed'] | |
| # Creating the coordinates for surface plot | |
| X, Y = np.meshgrid(n_estimators, max_depth) | |
| Z = speeds['diff'].values | |
| Z = np.array(Z).reshape((len(n_estimators), len(max_depth))) | |
| # PLoting | |
| fig = plt.figure(figsize=(12, 12)) | |
| ax = plt.axes(projection='3d') | |
| ax.plot_surface(X, Y, Z, rstride=1, cstride=1, edgecolor='none', alpha=0.7) | |
| ax.set_xlabel('n_estimators') | |
| ax.set_ylabel('max_depth') | |
| ax.set_zlabel('GPU speed advantage (s)') | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment