Moustafa Alzantot malzantot

malzantot / mountaincar_qlearning.py

Last active October 20, 2022 04:00

Solution of MountainCar OpenAI Gym problem using Q-Learning.

	"""
	Q-Learning example using OpenAI gym MountainCar enviornment

	Author: Moustafa Alzantot ([email protected])

	"""
	import numpy as np

	import gym
	from gym import wrappers

malzantot / frozenlake8x8_policyiteration.py

Created July 9, 2017 02:19

	"""
	Solving FrozenLake8x8 environment using Policy iteration.
	Author : Moustafa Alzantot ([email protected])
	"""
	import numpy as np
	import gym
	from gym import wrappers


	def run_episode(env, policy, gamma = 1.0, render = False):

malzantot / frozenlake8x8_valueiteration.py

Created July 9, 2017 01:49

Solution of FrozenLake8x8 environment using Value Iteration.

	"""
	Solving FrozenLake8x8 environment using Value-Itertion.


	Author : Moustafa Alzantot ([email protected])
	"""
	import numpy as np
	import gym
	from gym import wrappers

malzantot / mountaincar_qlearning.py

Created June 25, 2017 12:16

	"""
	Q-Learning example using OpenAI gym MountainCar enviornment
	Author: Moustafa Alzantot ([email protected])
	"""
	import numpy as np

	import gym
	from gym import wrappers

	n_states = 50

malzantot / frozenlake_genetic_algorithm.py

Created June 7, 2017 23:06

Solution of the FrozenLake problem using Genetic Algorithm

	import numpy as np
	import random
	import time
	import gym
	from gym import wrappers

	def run_episode(env, policy, episode_len=100):
	total_reward = 0
	obs = env.reset()
	for t in range(episode_len):

malzantot / frozenlake_randomsearch.py

Created June 7, 2017 20:52

	import numpy as np
	import time

	import gym

	def run_episode(env, policy, episode_len=100, render=False):
	total_reward = 0
	obs = env.reset()
	for t in range(episode_len):
	if render:

malzantot / cartpole_policysearch.py

Last active October 18, 2021 21:10

	import gym
	import numpy as np

	def gen_random_policy():
	return (np.random.uniform(-1,1, size=4), np.random.uniform(-1,1))

	def policy_to_action(env, policy, obs):
	if np.dot(policy[0], obs) + policy[1] > 0:
	return 1
	else:

malzantot / openai_gym101.py

Last active April 26, 2017 23:23

openai_gym101

	import gym

	env = gym.make('CartPole-v0')

	# Restart the environment to start a new episode
	obs = env.reset()

	for step_idx in range(500):
	env.render()
	obs, reward, done, _ = env.step(env.action_space.sample())