Last active
April 12, 2021 16:58
-
-
Save rmsander/041a6efaa2bda36109f82b7aea6cc0fe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Tester script for GPyTorch using analytic sine functions.""" | |
import torch | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import time | |
from sklearn.metrics import mean_squared_error as mse | |
def main(): | |
"""Main tester function.""" | |
# Set parameters | |
B = 256 # Number of batches | |
N = 100 # Number of data points in each batch | |
D = 3 # Dimension of X and Y data | |
Ds = 1 # Dimensions for first factored kernel - only needed if factored kernel is used | |
EPOCHS = 50 # Number of iterations to perform optimization | |
THR = -1e5 # Training threshold (minimum) | |
USE_CUDA = torch.cuda.is_available() # Hardware acceleraton | |
MEAN = 0 # Mean of data generated | |
SCALE = 1 # Variance of data generated | |
COMPOSITE_KERNEL = False # Use a factored kernel | |
USE_ARD = True # Use Automatic Relevance Determination in kernel | |
LR = 0.5 # Learning rate | |
# Create training data and labels | |
train_x_np = np.random.normal(loc=MEAN, scale=SCALE, size=(B, N, D)) # Randomly-generated data | |
train_y_np = np.sin(train_x_np) # Analytic sine function | |
# GPyTorch training | |
start = time.time() | |
model, likelihood = train_gp_batched_scalar(train_x_np, train_y_np, | |
use_cuda=USE_CUDA, | |
composite_kernel=COMPOSITE_KERNEL, | |
epochs=EPOCHS, lr=LR, thr=THR, ds=Ds, | |
use_ard=USE_ARD) | |
end = time.time() | |
print("TRAINING TIME: {}".format(end - start)) | |
model.eval() | |
likelihood.eval() | |
# Now evaluate | |
test_x_np = np.random.normal(loc=MEAN, scale=SCALE, size=(B, 1, D)) | |
train_x = preprocess_eval_inputs(train_x_np, train_y_np.shape[-1]) | |
test_x = preprocess_eval_inputs(test_x_np, train_y_np.shape[-1]) | |
# Put on GPU | |
if USE_CUDA: | |
test_x = test_x.cuda() | |
train_x = train_x.cuda() | |
# Run Inference | |
with torch.no_grad(): | |
f_preds = model(test_x) | |
observed_pred = likelihood(model(test_x)) | |
mean = observed_pred.mean | |
variance = observed_pred.variance | |
# Generate plots over different dimensions | |
for DIM, c in zip([0, 1, 2], ["r", "g", "b"]): | |
# Compute mean and unstack | |
output = mean.cpu().detach().numpy() | |
out_y = np.squeeze(np.array([output[i::B] for i in range(B)])) | |
# Reformat, get analytic y, and plot | |
x_plot = np.squeeze(test_x_np) | |
y_plot_gt = np.sin(x_plot) | |
plt.scatter(x_plot[:, DIM], out_y[:, DIM]) | |
plt.title("Test X vs. Predicted Y, Dimension {}".format(DIM)) | |
plt.show() | |
# Creating color map | |
my_cmap_analytic = plt.get_cmap('hot') | |
my_cmap_predicted = plt.get_cmap('cool') | |
# Plot ground truth | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
ax.plot_trisurf(x_plot[..., 0], x_plot[..., 1], y_plot_gt[..., DIM], | |
antialiased=True, cmap=my_cmap_analytic) | |
plt.title("Analytic, Ground Truth Surface, Dimension {}".format(DIM)) | |
plt.show() | |
# Plot the predicted values | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
ax.plot_trisurf(x_plot[..., 0], x_plot[..., 1], out_y[..., DIM], | |
antialiased=True, cmap=my_cmap_predicted) | |
plt.title("Predicted Surface, Dimension {}".format(DIM)) | |
plt.show() | |
# Compute average RMSE | |
y_true = np.squeeze(np.sin(test_x_np)) | |
y_pred = out_y | |
rmse = np.sqrt(mse(y_true, y_pred)) | |
print("RMSE: {}".format(rmse)) | |
return rmse | |
def train_gp_batched_scalar(Zs, Ys, use_cuda=False, epochs=10, | |
lr=0.1, thr=0, use_ard=False, composite_kernel=False, | |
ds=None, global_hyperparams=False, | |
model_hyperparams=None): | |
"""Computes a Gaussian Process object using GPyTorch. Each outcome is | |
modeled as a single scalar outcome. | |
Parameters: | |
Zs (np.array): Array of inputs of expanded shape (B, N, XD), where B is | |
the size of the minibatch, N is the number of data points in each | |
GP (the number of neighbors we consider in IER), and XD is the | |
dimensionality of the state-action space of the environment. | |
Ys (np.array): Array of predicted values of shape (B, N, YD), where B is the | |
size of the minibatch and N is the number of data points in each | |
GP (the number of neighbors we consider in IER), and YD is the | |
dimensionality of the state-reward space of the environment. | |
use_cuda (bool): Whether to use CUDA for GPU acceleration with PyTorch | |
during the optimization step. Defaults to False. | |
epochs (int): The number of epochs to train the batched GPs over. | |
Defaults to 10. | |
lr (float): The learning rate to use for the Adam optimizer to train | |
the batched GPs. | |
thr (float): The mll threshold at which to stop training. Defaults to 0. | |
use_ard (bool): Whether to use Automatic Relevance Determination (ARD) | |
for the lengthscale parameter, i.e. a weighting for each input dimension. | |
Defaults to False. | |
composite_kernel (bool): Whether to use a composite kernel that computes | |
the product between states and actions to compute the variance of y. | |
ds (int): If using a composite kernel, ds specifies the dimensionality of | |
the state. Only applicable if composite_kernel is True. | |
global_hyperparams (bool): Whether to use a single set of hyperparameters | |
over an entire model. Defaults to False. | |
model_hyperparams (dict): A dictionary of hyperparameters to use for | |
initializing a model. Defaults to None. | |
Returns: | |
model (BatchedGP): A GPR model of BatchedGP type with which to generate | |
synthetic predictions of rewards and next states. | |
likelihood (GaussianLikelihood): A likelihood object used for training | |
and predicting samples with the BatchedGP model. | |
""" | |
# Preprocess batch data | |
B, N, XD = Zs.shape | |
YD = Ys.shape[-1] | |
batch_shape = B * YD | |
if use_cuda: # If GPU available | |
output_device = torch.device('cuda:0') # GPU | |
# Format the training features - tile and reshape | |
train_x = torch.tensor(Zs, device=output_device) | |
train_x = train_x.repeat((YD, 1, 1)) | |
# Format the training labels - reshape | |
train_y = torch.vstack( | |
[torch.tensor(Ys, device=output_device)[..., i] for i in range(YD)]) | |
# initialize likelihood and model | |
likelihood = GaussianLikelihood(batch_shape=torch.Size([batch_shape])) | |
# Determine which type of kernel to use | |
if composite_kernel: | |
model = CompositeBatchedGP(train_x, train_y, likelihood, batch_shape, | |
output_device, use_ard=use_ard, ds=ds) | |
else: | |
model = BatchedGP(train_x, train_y, likelihood, batch_shape, | |
output_device, use_ard=use_ard) | |
# Initialize the model with hyperparameters | |
if model_hyperparams is not None: | |
model.initialize(**model_hyperparams) | |
# Determine if we need to optimize hyperparameters | |
if global_hyperparams: | |
if use_cuda: # Send everything to GPU for training | |
model = model.cuda().eval() | |
# Empty the cache from GPU | |
torch.cuda.empty_cache() | |
gc.collect() # NOTE: Critical to avoid GPU leak | |
del train_x, train_y, Zs, Ys, likelihood | |
return model, model.likelihood | |
# Find optimal model hyperparameters | |
model.train() | |
likelihood.train() | |
# Use the adam optimizer | |
optimizer = torch.optim.Adam(model.parameters(), lr=lr) | |
# "Loss" for GPs - the marginal log likelihood | |
mll = ExactMarginalLogLikelihood(likelihood, model) | |
if use_cuda: # Send everything to GPU for training | |
model = model.cuda() | |
likelihood = likelihood.cuda() | |
train_x = train_x.cuda() | |
train_y = train_y.cuda() | |
mll = mll.cuda() | |
def epoch_train(j): | |
"""Helper function for running training in the optimization loop. Note | |
that the model and likelihood are updated outside of this function as well. | |
Parameters: | |
j (int): The epoch number. | |
Returns: | |
item_loss (float): The numeric representation (detached from the | |
computation graph) of the loss from the jth epoch. | |
""" | |
optimizer.zero_grad() # Zero gradients | |
output = model(train_x) # Forwardpass | |
loss = -mll(output, train_y).sum() # Compute ind. losses + aggregate | |
loss.backward() # Backpropagate gradients | |
item_loss = loss.item() # Extract loss (detached from comp. graph) | |
optimizer.step() # Update weights | |
optimizer.zero_grad() # Zero gradients | |
gc.collect() # NOTE: Critical to avoid GPU leak | |
return item_loss | |
# Run the optimization loop | |
for i in range(epochs): | |
loss_i = epoch_train(i) | |
if i % 10 == 0: | |
print("LOSS EPOCH {}: {}".format(i, loss_i)) | |
if loss_i < thr: # If we reach a certain loss threshold, stop training | |
break | |
# Empty the cache from GPU | |
torch.cuda.empty_cache() | |
return model, likelihood | |
def preprocess_eval_inputs(Zs, d_y, device="cpu"): | |
"""Helper function to preprocess inputs for use with training | |
targets and evaluation. | |
Parameters: | |
Zs (np.array): Array of inputs of expanded shape (B, N, XD), where B is | |
the size of the minibatch, N is the number of data points in each | |
GP (the number of neighbors we consider in IER), and XD is the | |
dimensionality of the state-action space of the environment. | |
d_y (int): The dimensionality of the targets of GPR. | |
device (str): Device to output the tensor to. | |
Returns: | |
eval_x (torch.tensor): Torch tensor of shape (B * YD, N, XD). This | |
tensor corresponding to a tiled set of inputs is used as input for | |
the inference model in FP32 format. | |
""" | |
# Preprocess batch data | |
eval_x = torch.tensor(Zs, device=device).double() | |
eval_x = eval_x.repeat((d_y, 1, 1)) | |
return eval_x | |
if __name__ == '__main__': | |
all_rmses = [] | |
for i in range(10): | |
all_rmses.append(main()) | |
print("AVERAGED RMSE: {}".format(np.mean(all_rmses))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment