Skip to content

Instantly share code, notes, and snippets.

@breeko
Last active February 18, 2018 12:57
Show Gist options
  • Save breeko/83df75fc5ff367eca8c3f8abcbb28afd to your computer and use it in GitHub Desktop.
Save breeko/83df75fc5ff367eca8c3f8abcbb28afd to your computer and use it in GitHub Desktop.
Q-network used for reinforcement learning
from keras.models import Model
from keras.layers import Conv2D, Dense, Flatten, Input, Lambda
import keras.backend as K
class Qnetwork():
def __init__(self, final_layer_size, input_shape, num_actions):
# The input image of the game is 84 x 84 x 3 (RGB)
self.inputs = Input(shape=[input_shape], name="main_input")
# There will be four layers of convolutions performed on the image input
# A convolution take a portion of an input and matrix multiplies
# a filter on the portion to get a new input (see below)
self.model = Conv2D(
filters=32,
kernel_size=[8,8],
strides=[4,4],
activation="relu",
padding="valid",
name="conv1")(self.inputs)
self.model = Conv2D(
filters=64,
kernel_size=[4,4],
strides=[2,2],
activation="relu",
padding="valid",
name="conv2")(self.model)
self.model = Conv2D(
filters=64,
kernel_size=[3,3],
strides=[1,1],
activation="relu",
padding="valid",
name="conv3")(self.model)
self.model = Conv2D(
filters= final_layer_size,
kernel_size=[7,7],
strides=[1,1],
activation="relu",
padding="valid",
name="conv4")(self.model)
# We then separate the final convolution layer into an advantage and value
# stream. The value function is how well off you are in a given state.
# The advantage is the how much better off you are after making a particular
# move. Q is the value function of a state after a given action.
# Advantage(state, action) = Q(state, action) - Value(state)
self.stream_AC = Lambda(lambda layer: layer[:,:,:,:final_layer_size // 2],name="advantage")(self.model)
self.stream_VC = Lambda(lambda layer: layer[:,:,:,final_layer_size // [2:],name="value")(self.model)
# We then flatten the advantage and value functions
self.stream_AC = Flatten(name="advantage_flatten")(self.stream_AC)
self.stream_VC = Flatten(name="value_flatten")(self.stream_VC)
# We define weights for our advantage and value layers. We will train these
# layers so the matmul will match the expected value and advantage from play
self.Advantage = Dense(num_actions,name="advantage_final")(self.stream_AC)
self.Value = Dense(1,name="value_final")(self.stream_VC)
# To get the Q output, we need to add the value to the advantage.
# The advantage to be evaluated will bebased on how good the action
# is based on the average advantage of that state
self.model = Lambda(lambda val_adv: val_adv[0] + (val_adv[1] - K.mean(val_adv[1],axis=1,keepdims=True)),name="final_out")([self.Value,self.Advantage])
self.model = Model(self.inputs, self.model)
self.model.compile("adam","mse")
self.model.optimizer.lr = 0.0001
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment