Skip to content

Instantly share code, notes, and snippets.

@arshren
Created October 29, 2022 10:43
Show Gist options
  • Save arshren/3834d19b57836faf63b6b041a57107ed to your computer and use it in GitHub Desktop.
Save arshren/3834d19b57836faf63b6b041a57107ed to your computer and use it in GitHub Desktop.
Q-Learning
# Import required libararies
import gym
import matplotlib.pyplot as plt
import random
import numpy as np
from IPython.display import clear_output
#Create an instance of the the Taxi-v3 environment
env= gym.make("Taxi-v3").env
# Creating the q-table
q_table= np.zeros([env.observation_space.n, env.action_space.n])
# define hyperparameters
alpha=0.1
gamma=0.6
epsilon=0.4
#Q-learning uses the maximum Q' over all possible actions for the next step
all_rewards=[]
episodes=100000
frames=[]
for i in range(1, episodes):
state= env.reset()
epochs, total_reward=0,0
done=False
while not done:
# Behaviourial Policy that explores and exploits
if random.uniform(0,1)<epsilon:
#exploratory or behavioural policy
# takes random action to explore the environment(action space)
action = env.action_space.sample()
else:
# Exploit learned values
action = np.argmax(q_table[state])
next_state, reward, done, info= env.step(action)
old_value= q_table[state, action]
# target policy that uses the maximum Q' over all possible actions for the next state
next_max= np.max( q_table[next_state])
#Q-Learning maximizes the state-action value function(Q)
#over all possible actions for the next steps.
#Q(S, A) = Q(S, A) + α* (R + γ * maxQ(S', A) - Q(S, A))
new_value=old_value + alpha *( reward + gamma * next_max) - old_value
q_table[state,action]=new_value
state= next_state
#calculates the total reward
total_reward+=reward
frames.append(
{
'frame': env.render(mode='ansi'),
'state': state,
'action':action,
'reward': reward,
'episode':i
}
)
epochs+=1
# Register total_reward
all_rewards.append(total_reward)
if i%100==0:
clear_output(wait=True)
print("Episode: ", i , "Reward: ", total_reward, "Epochs: ", epochs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment