NMZivkovic · June 23, 2019 17:16
diff --git a/training.py b/training.py
 num_of_episodes = 100000

 for episode in range(0, num_of_episodes):
    # Reset the enviroment
    state = enviroment.reset()

    # Initialize variables
    reward = 0
    terminated = False
    
    while not terminated:
        # Take learned path or explore new actions based on the epsilon
        if random.uniform(0, 1) < epsilon:
            action = enviroment.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        # Take action    
        next_state, reward, terminated, info = enviroment.step(action) 
        
        # Recalculate
        q_value = q_table[state, action]
        max_value = np.max(q_table[next_state])
        new_q_value = (1 - alpha) * q_value + alpha * (reward + gamma * max_value)
        
        # Update Q-table
        q_table[state, action] = new_q_value
        state = next_state
        
    if (episode + 1) % 100 == 0:
        clear_output(wait=True)
        print("Episode: {}".format(episode + 1))
        enviroment.render()

 print("**********************************")
 print("Training is done!\n")
 print("**********************************")
	num_of_episodes = 100000

	for episode in range(0, num_of_episodes):
	# Reset the enviroment
	state = enviroment.reset()

	# Initialize variables
	reward = 0
	terminated = False

	while not terminated:
	# Take learned path or explore new actions based on the epsilon
	if random.uniform(0, 1) < epsilon:
	action = enviroment.action_space.sample()
	else:
	action = np.argmax(q_table[state])

	# Take action
	next_state, reward, terminated, info = enviroment.step(action)

	# Recalculate
	q_value = q_table[state, action]
	max_value = np.max(q_table[next_state])
	new_q_value = (1 - alpha) * q_value + alpha * (reward + gamma * max_value)

	# Update Q-table
	q_table[state, action] = new_q_value
	state = next_state

	if (episode + 1) % 100 == 0:
	clear_output(wait=True)
	print("Episode: {}".format(episode + 1))
	enviroment.render()

	print("**********************************")
	print("Training is done!\n")
	print("**********************************")