pythonlessons · January 14, 2020 14:44
diff --git a/OurModel.py b/OurModel.py
 def OurModel(input_shape, action_space, dueling):
    X_input = Input(input_shape)
    X = X_input

    # 'Dense' is the basic form of a neural network layer
 	# Input Layer of state size(4) and Hidden Layer with 512 nodes
    X = Dense(512, input_shape=input_shape, activation="relu", kernel_initializer='he_uniform')(X)

    # Hidden layer with 256 nodes
    X = Dense(256, activation="relu", kernel_initializer='he_uniform')(X)
    
    # Hidden layer with 64 nodes
    X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)

    if dueling:
    	state_value = Dense(1, kernel_initializer='he_uniform')(X)
        state_value = Lambda(lambda s: K.expand_dims(s[:, 0], -1), output_shape=(action_space,))(state_value)

        action_advantage = Dense(action_space, kernel_initializer='he_uniform')(X)
        action_advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(action_space,))(action_advantage)

        X = Add()([state_value, action_advantage])
    else:
        # Output Layer with # of actions: 2 nodes (left, right)
        X = Dense(action_space, activation="linear", kernel_initializer='he_uniform')(X)

    model = Model(inputs = X_input, outputs = X, name='CartPole Dueling DDQN model')
    model.compile(loss="mean_squared_error", optimizer=RMSprop(lr=0.00025, rho=0.95, epsilon=0.01), metrics=["accuracy"])

    model.summary()
    return model
	def OurModel(input_shape, action_space, dueling):
	X_input = Input(input_shape)
	X = X_input

	# 'Dense' is the basic form of a neural network layer
	# Input Layer of state size(4) and Hidden Layer with 512 nodes
	X = Dense(512, input_shape=input_shape, activation="relu", kernel_initializer='he_uniform')(X)

	# Hidden layer with 256 nodes
	X = Dense(256, activation="relu", kernel_initializer='he_uniform')(X)

	# Hidden layer with 64 nodes
	X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)

	if dueling:
	state_value = Dense(1, kernel_initializer='he_uniform')(X)
	state_value = Lambda(lambda s: K.expand_dims(s[:, 0], -1), output_shape=(action_space,))(state_value)

	action_advantage = Dense(action_space, kernel_initializer='he_uniform')(X)
	action_advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(action_space,))(action_advantage)

	X = Add()([state_value, action_advantage])
	else:
	# Output Layer with # of actions: 2 nodes (left, right)
	X = Dense(action_space, activation="linear", kernel_initializer='he_uniform')(X)

	model = Model(inputs = X_input, outputs = X, name='CartPole Dueling DDQN model')
	model.compile(loss="mean_squared_error", optimizer=RMSprop(lr=0.00025, rho=0.95, epsilon=0.01), metrics=["accuracy"])

	model.summary()
	return model
No results found