Skip to content

Instantly share code, notes, and snippets.

View hadifar's full-sized avatar
:octocat:

Amir Hadifar hadifar

:octocat:
View GitHub Profile
if len(replay_buffer) >= batch_size:
batch_state, batch_action, batch_reward, batch_next_state, batch_done = \
zip(*random.sample(replay_buffer, batch_size))
batch_state, batch_reward, batch_next_state, batch_done = \
[np.array(a, dtype=np.float32) for a in [batch_state, batch_reward, batch_next_state, batch_done]]
batch_action = np.array(batch_action, dtype=np.int32)
q_value = model(tf.constant(batch_next_state, dtype=tf.float32))
# At begining of the training, we mostly create random action
# As we go forward, we choose actions based on our NeuralNetwork model
if random.random() < epsilon:
action = env.action_space.sample()
else:
action = model.predict(tf.constant(np.expand_dims(state, axis=0), dtype=tf.float32)).numpy()[0]
# Create next training data
next_state, reward, done, info = env.step(action)
class QNetwork(tf.keras.Model):
def __init__(self, n_action_space=3):
super().__init__()
self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
self.dense3 = tf.keras.layers.Dense(units=n_action_space)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
import gym
env = gym.make('MountainCar-v0') # Instantiate a game environment. The parameter is its name.
state = env.reset() # Initialize the environment and get its initial state.
while True:
env.render() # Render the current frame.
action = model.predict(state) # Assume we have a trained a NN model that can predict the action
# if you just want to experiment uncomment line below and comment line above!
# action = env.action_space.sample()
import gym
env = gym.make('MountainCar-v0')
env.reset()
for _ in range(1000):
env.render()
env.step(env.action_space.sample())
[sudo] pip install gym
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
class Linear(tf.keras.Model):
def __init__(self):
super().__init__()
self.layer = LinearLayer()
def call(self, input):
output = self.layer(input)
return output
model = Linear()
optimizer = tf.train.GradientDescentOptimizer(1e-3)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
def build(self, input_shape):
self.w = self.add_variable(name='w', shape=[input_shape[-1], 1], initializer=tf.zeros_initializer())
self.b = self.add_variable(name='b', shape=[1], initializer=tf.zeros_initializer())
def call(self, X):
y_pred = tf.matmul(X, self.w) + self.b
return y_pred
class MyLayer(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
def build(self, input_shape):
pass
def call(self, input):
pass
def compute_output_shape(self, input_shape):
pass