hadifar · November 20, 2018 15:06
diff --git a/training_sample_creation.py b/training_sample_creation.py
 # At begining of the training, we mostly create random action
 # As we go forward, we choose actions based on our NeuralNetwork model
 if random.random() < epsilon: 
    action = env.action_space.sample()
 else: 
    action = model.predict(tf.constant(np.expand_dims(state, axis=0), dtype=tf.float32)).numpy()[0]

 # Create next training data
 next_state, reward, done, info = env.step(action)

 # if we reach to top of mountain add reward 
 reward = -10. if done else reward

 # replay_buffer contains all trainign data
 replay_buffer.append((state, action, reward, next_state, 1 if done else 0))

 state = next_state
	# At begining of the training, we mostly create random action
	# As we go forward, we choose actions based on our NeuralNetwork model
	if random.random() < epsilon:
	action = env.action_space.sample()
	else:
	action = model.predict(tf.constant(np.expand_dims(state, axis=0), dtype=tf.float32)).numpy()[0]

	# Create next training data
	next_state, reward, done, info = env.step(action)

	# if we reach to top of mountain add reward
	reward = -10. if done else reward

	# replay_buffer contains all trainign data
	replay_buffer.append((state, action, reward, next_state, 1 if done else 0))

	state = next_state