Skip to content

Instantly share code, notes, and snippets.

@pythonlessons
Last active November 10, 2020 20:17
Show Gist options
  • Select an option

  • Save pythonlessons/5f2d9c6ff30da19813e47d6f14bc41f6 to your computer and use it in GitHub Desktop.

Select an option

Save pythonlessons/5f2d9c6ff30da19813e47d6f14bc41f6 to your computer and use it in GitHub Desktop.
LunarLander-v2_critic
class Critic_Model:
def __init__(self, input_shape, action_space, lr, optimizer):
X_input = Input(input_shape)
old_values = Input(shape=(1,))
V = Dense(512, activation="relu", kernel_initializer='he_uniform')(X_input)
V = Dense(256, activation="relu", kernel_initializer='he_uniform')(V)
V = Dense(64, activation="relu", kernel_initializer='he_uniform')(V)
value = Dense(1, activation=None)(V)
self.Critic = Model(inputs=[X_input, old_values], outputs = value)
self.Critic.compile(loss=[self.critic_PPO2_loss(old_values)], optimizer=optimizer(lr=lr))
def critic_PPO2_loss(self, values):
def loss(y_true, y_pred):
LOSS_CLIPPING = 0.2
clipped_value_loss = values + K.clip(y_pred - values, -LOSS_CLIPPING, LOSS_CLIPPING)
v_loss1 = (y_true - clipped_value_loss) ** 2
v_loss2 = (y_true - y_pred) ** 2
value_loss = 0.5 * K.mean(K.maximum(v_loss1, v_loss2))
#value_loss = K.mean((y_true - y_pred) ** 2) # standard PPO loss
return value_loss
return loss
def predict(self, state):
return self.Critic.predict([state, np.zeros((state.shape[0], 1))])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment