gandroz · March 14, 2021 02:24
diff --git a/playing_q_table.py b/playing_q_table.py
 # Reset environment
 state = env.reset()

 # Render it
 env.render()
 time.sleep(0.5)

 done = False
 while not done:
    # Choose the action with the max expected reward i.e. max Q-value
    action = np.argmax(q_table[state])
    # Try it !
    state, reward, done, info = env.step(action)
    # See the result
    clear_output(wait=True)
    env.render()
    print(reward)
    time.sleep(0.5)
	# Reset environment
	state = env.reset()

	# Render it
	env.render()
	time.sleep(0.5)

	done = False
	while not done:
	# Choose the action with the max expected reward i.e. max Q-value
	action = np.argmax(q_table[state])
	# Try it !
	state, reward, done, info = env.step(action)
	# See the result
	clear_output(wait=True)
	env.render()
	print(reward)
	time.sleep(0.5)