Last active
February 18, 2018 12:57
-
-
Save breeko/83df75fc5ff367eca8c3f8abcbb28afd to your computer and use it in GitHub Desktop.
Q-network used for reinforcement learning
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.models import Model | |
from keras.layers import Conv2D, Dense, Flatten, Input, Lambda | |
import keras.backend as K | |
class Qnetwork(): | |
def __init__(self, final_layer_size, input_shape, num_actions): | |
# The input image of the game is 84 x 84 x 3 (RGB) | |
self.inputs = Input(shape=[input_shape], name="main_input") | |
# There will be four layers of convolutions performed on the image input | |
# A convolution take a portion of an input and matrix multiplies | |
# a filter on the portion to get a new input (see below) | |
self.model = Conv2D( | |
filters=32, | |
kernel_size=[8,8], | |
strides=[4,4], | |
activation="relu", | |
padding="valid", | |
name="conv1")(self.inputs) | |
self.model = Conv2D( | |
filters=64, | |
kernel_size=[4,4], | |
strides=[2,2], | |
activation="relu", | |
padding="valid", | |
name="conv2")(self.model) | |
self.model = Conv2D( | |
filters=64, | |
kernel_size=[3,3], | |
strides=[1,1], | |
activation="relu", | |
padding="valid", | |
name="conv3")(self.model) | |
self.model = Conv2D( | |
filters= final_layer_size, | |
kernel_size=[7,7], | |
strides=[1,1], | |
activation="relu", | |
padding="valid", | |
name="conv4")(self.model) | |
# We then separate the final convolution layer into an advantage and value | |
# stream. The value function is how well off you are in a given state. | |
# The advantage is the how much better off you are after making a particular | |
# move. Q is the value function of a state after a given action. | |
# Advantage(state, action) = Q(state, action) - Value(state) | |
self.stream_AC = Lambda(lambda layer: layer[:,:,:,:final_layer_size // 2],name="advantage")(self.model) | |
self.stream_VC = Lambda(lambda layer: layer[:,:,:,final_layer_size // [2:],name="value")(self.model) | |
# We then flatten the advantage and value functions | |
self.stream_AC = Flatten(name="advantage_flatten")(self.stream_AC) | |
self.stream_VC = Flatten(name="value_flatten")(self.stream_VC) | |
# We define weights for our advantage and value layers. We will train these | |
# layers so the matmul will match the expected value and advantage from play | |
self.Advantage = Dense(num_actions,name="advantage_final")(self.stream_AC) | |
self.Value = Dense(1,name="value_final")(self.stream_VC) | |
# To get the Q output, we need to add the value to the advantage. | |
# The advantage to be evaluated will bebased on how good the action | |
# is based on the average advantage of that state | |
self.model = Lambda(lambda val_adv: val_adv[0] + (val_adv[1] - K.mean(val_adv[1],axis=1,keepdims=True)),name="final_out")([self.Value,self.Advantage]) | |
self.model = Model(self.inputs, self.model) | |
self.model.compile("adam","mse") | |
self.model.optimizer.lr = 0.0001 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment