Skip to content

Instantly share code, notes, and snippets.

@pythonlessons
Created January 14, 2020 14:44
Show Gist options
  • Save pythonlessons/6b2bbf425c93702ba5199cac58868e9f to your computer and use it in GitHub Desktop.
Save pythonlessons/6b2bbf425c93702ba5199cac58868e9f to your computer and use it in GitHub Desktop.
03_CartPole-reinforcement-learning_Dueling_DDQN
def OurModel(input_shape, action_space, dueling):
X_input = Input(input_shape)
X = X_input
# 'Dense' is the basic form of a neural network layer
# Input Layer of state size(4) and Hidden Layer with 512 nodes
X = Dense(512, input_shape=input_shape, activation="relu", kernel_initializer='he_uniform')(X)
# Hidden layer with 256 nodes
X = Dense(256, activation="relu", kernel_initializer='he_uniform')(X)
# Hidden layer with 64 nodes
X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
if dueling:
state_value = Dense(1, kernel_initializer='he_uniform')(X)
state_value = Lambda(lambda s: K.expand_dims(s[:, 0], -1), output_shape=(action_space,))(state_value)
action_advantage = Dense(action_space, kernel_initializer='he_uniform')(X)
action_advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(action_space,))(action_advantage)
X = Add()([state_value, action_advantage])
else:
# Output Layer with # of actions: 2 nodes (left, right)
X = Dense(action_space, activation="linear", kernel_initializer='he_uniform')(X)
model = Model(inputs = X_input, outputs = X, name='CartPole Dueling DDQN model')
model.compile(loss="mean_squared_error", optimizer=RMSprop(lr=0.00025, rho=0.95, epsilon=0.01), metrics=["accuracy"])
model.summary()
return model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment