Eligijus112 · March 22, 2021 18:21
diff --git a/hp-speed-xgoobst.py b/hp-speed-xgoobst.py
 # Defining the grid of parameters 
 n_estimators = [30, 100, 200, 300, 600, 900]
 max_depth = [4, 6, 8, 12, 14, 16]

 # Number of rows to test on 
 nrows = 30000

 # Creating a dictionary
 gpu_dict = {
    'objective': ['reg:squarederror'], 
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'tree_method': ['gpu_hist']
 }

 cpu_dict = {
    'objective': ['reg:squarederror'], 
    'n_estimators': n_estimators,
    'max_depth': max_depth
 }

 # Creating the grid of hyper parameters 
 gpu_hp = ParameterGrid(gpu_dict)
 cpu_hp = ParameterGrid(cpu_dict)

 # Getting the random subset of data 
 subset = d.sample(nrows)

 # Iterating through all hte hyper parameters: 
 gpu_speeds = []
 for params in gpu_hp:
    # Creating X and Y 
    X = pd.get_dummies(subset[features])
    Y = subset['Sales']
    
    # Initiating the model objects
    gpu = xgb.XGBRegressor(**params)
    
    # Training on cpu 
    start = time.time()
    gpu.fit(X, Y)
    gpu_speed = time.time() - start
    
    gpu_speeds.append(gpu_speed)
    
 # Iterating through all hte hyper parameters: 
 cpu_speeds = []
 for params in cpu_hp:
    # Creating X and Y 
    X = pd.get_dummies(subset[features])
    Y = subset['Sales']
    
    # Initiating the model objects
    cpu = xgb.XGBRegressor(**params)
    
    # Training on cpu 
    start = time.time()
    cpu.fit(X, Y)
    cpu_speed = time.time() - start
    
    cpu_speeds.append(cpu_speed)
    
 # Creating the speed frame 
 speeds = pd.DataFrame({
    'n_estimators': [x.get('n_estimators') for x in list(cpu_hp)],
    'max_depth': [x.get('max_depth') for x in list(cpu_hp)],
    'cpu_speed': cpu_speeds,
    'gpu_speed': gpu_speeds
 })

 # Diff of speed between GPU and CPU
 speeds['diff'] = speeds['cpu_speed'] - speeds['gpu_speed']

 # Creating the coordinates for surface plot
 X, Y = np.meshgrid(n_estimators, max_depth)
 Z = speeds['diff'].values
 Z = np.array(Z).reshape((len(n_estimators), len(max_depth)))

 # PLoting
 fig = plt.figure(figsize=(12, 12))
 ax = plt.axes(projection='3d')
 ax.plot_surface(X, Y, Z, rstride=1, cstride=1, edgecolor='none', alpha=0.7)
 ax.set_xlabel('n_estimators')
 ax.set_ylabel('max_depth')
 ax.set_zlabel('GPU speed advantage (s)')
 plt.show()
	# Defining the grid of parameters
	n_estimators = [30, 100, 200, 300, 600, 900]
	max_depth = [4, 6, 8, 12, 14, 16]

	# Number of rows to test on
	nrows = 30000

	# Creating a dictionary
	gpu_dict = {
	'objective': ['reg:squarederror'],
	'n_estimators': n_estimators,
	'max_depth': max_depth,
	'tree_method': ['gpu_hist']
	}

	cpu_dict = {
	'objective': ['reg:squarederror'],
	'n_estimators': n_estimators,
	'max_depth': max_depth
	}

	# Creating the grid of hyper parameters
	gpu_hp = ParameterGrid(gpu_dict)
	cpu_hp = ParameterGrid(cpu_dict)

	# Getting the random subset of data
	subset = d.sample(nrows)

	# Iterating through all hte hyper parameters:
	gpu_speeds = []
	for params in gpu_hp:
	# Creating X and Y
	X = pd.get_dummies(subset[features])
	Y = subset['Sales']

	# Initiating the model objects
	gpu = xgb.XGBRegressor(**params)

	# Training on cpu
	start = time.time()
	gpu.fit(X, Y)
	gpu_speed = time.time() - start

	gpu_speeds.append(gpu_speed)

	# Iterating through all hte hyper parameters:
	cpu_speeds = []
	for params in cpu_hp:
	# Creating X and Y
	X = pd.get_dummies(subset[features])
	Y = subset['Sales']

	# Initiating the model objects
	cpu = xgb.XGBRegressor(**params)

	# Training on cpu
	start = time.time()
	cpu.fit(X, Y)
	cpu_speed = time.time() - start

	cpu_speeds.append(cpu_speed)

	# Creating the speed frame
	speeds = pd.DataFrame({
	'n_estimators': [x.get('n_estimators') for x in list(cpu_hp)],
	'max_depth': [x.get('max_depth') for x in list(cpu_hp)],
	'cpu_speed': cpu_speeds,
	'gpu_speed': gpu_speeds
	})

	# Diff of speed between GPU and CPU
	speeds['diff'] = speeds['cpu_speed'] - speeds['gpu_speed']

	# Creating the coordinates for surface plot
	X, Y = np.meshgrid(n_estimators, max_depth)
	Z = speeds['diff'].values
	Z = np.array(Z).reshape((len(n_estimators), len(max_depth)))

	# PLoting
	fig = plt.figure(figsize=(12, 12))
	ax = plt.axes(projection='3d')
	ax.plot_surface(X, Y, Z, rstride=1, cstride=1, edgecolor='none', alpha=0.7)
	ax.set_xlabel('n_estimators')
	ax.set_ylabel('max_depth')
	ax.set_zlabel('GPU speed advantage (s)')
	plt.show()