athergeek · November 23, 2018 00:52
diff --git a/qlearning.py b/qlearning.py
 import numpy as np
 import pandas as pd
 import random as rand

 world_df = pd.read_csv('world01.csv',
                       names=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
 # Change world as multi dimensional array
 world = world_df.values
 print(world)

 number_of_states = len(world[0]) * len(world[:, 0])
 number_of_actions = 4

 # Create a Qtable
 qtable = np.random.rand(number_of_states, number_of_actions)
 print("Qtable at the beginning")
 print(qtable)

 start_pos_ix = 4
 start_pos_iy = 9

 # 0 : blank space
 # 1 : obstacle
 # 2 : starting location for the robot
 # 3 : goal location
 # 5 : quicksand

 # Objective : learn how to navigate from the starting location to the goal with the highest total reward
 # Reward:
 # -1 :  if the robot moves to an empty or blank space, or attempts to move into a wall
 # -100 : if the robot moves to a quicksand space
 # 1 : if the robot moves to the goal space

 # Learn the Qtable
 max_iterations = 50000
 learning_rate = 0.2
 gamma = 0.9
 random_action_decay_rate = 0.99

 def prepare_qtable():
 	random_action_rate = 0.97
 	start_state_tuple = (start_pos_iy, start_pos_ix)
 	start_state = int(str(start_pos_iy) + str(start_pos_ix))
 	s = start_state
 	pos_y = start_pos_iy
 	pos_x = start_pos_ix
 	goal_reached = 0
 	absorbed = 0

 	for _ in range(max_iterations):
 		action = act(random_action_rate, s)
 		prev_state = (pos_y, pos_x)
 		new_state = prev_state
 		if action == 0:
 			# move right
 			new_state = (pos_y, pos_x + 1)
 		elif action == 1:
 			# move up
 			new_state = (pos_y - 1, pos_x)
 		elif action == 2:
 			# move left
 			new_state = (pos_y, pos_x - 1)
 		elif action == 3:
 			# move down
 			new_state = (pos_y + 1, pos_x)

 		if new_state[1] < 0:
 			s_prime = -1 * int(str(new_state[0]) + str(-1*new_state[1]))
 		else:
 			s_prime = int(str(new_state[0]) + str(new_state[1]))

 		if (s_prime) < 0 or (new_state[0] > 9) or (new_state[1] > 9) or (world[new_state[0]][new_state[1]] == 1):
 			# if the agent tries to go beyond the environment or moves into the wall
 			# give it a score of -1 and make new state of agent equal to the
 			# old state
 			new_state = prev_state
 			s_prime = s
 			reward = -1
 		elif world[new_state[0]][new_state[1]] == 5:
 			# if the agent moves into the quicksand, end the episode
 			reward = -100
 			new_state = start_state_tuple
 			absorbed += 1
 		elif world[new_state[0]][new_state[1]] == 1:
 			# if the agent reaches the goal, end the episode with a
 			# positive reward
 			reward = 1
 			new_state = start_state_tuple
 			goal_reached += 1
 		else:
 			reward = -1

 		update_qtable(s, action, s_prime, reward)

 		s = s_prime
 		a = action
 		pos_y = new_state[0]
 		pos_x = new_state[1]
 		random_action_rate = random_action_rate * random_action_decay_rate
 	
 	print("Number of times agent reached the goal during the training: {}".format(goal_reached))
 	print("Number of times agent was absorbed during the training: {}".format(absorbed))

 def update_qtable(s, a, s_prime, r):

    #print(s, ' ', a, ' ', s_prime, ' ', r)
    #print((1.0 - learning_rate) * qtable[s, a] + learning_rate * (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])]))
    qtable[s, a] = (1.0 - learning_rate) * qtable[s, a] + learning_rate * \
        (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])])


 def act(rar, state):
    flip = rand.random()
    if flip < rar:
        # Take a random step
        action = rand.randint(0, number_of_actions - 1)
    else:
        # Take a calculated step
        action = np.argmax(qtable[state])

    return action



 if __name__ == "__main__":
 	prepare_qtable()
 	print("Qtable after {} iterations".format(max_iterations))
 	print(qtable)
 	print("Saving qtabe as qtable.csv")
 	pd.DataFrame(data = qtable).to_csv('qtable.csv')
	import numpy as np
	import pandas as pd
	import random as rand

	world_df = pd.read_csv('world01.csv',
	names=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
	# Change world as multi dimensional array
	world = world_df.values
	print(world)

	number_of_states = len(world[0]) * len(world[:, 0])
	number_of_actions = 4

	# Create a Qtable
	qtable = np.random.rand(number_of_states, number_of_actions)
	print("Qtable at the beginning")
	print(qtable)

	start_pos_ix = 4
	start_pos_iy = 9

	# 0 : blank space
	# 1 : obstacle
	# 2 : starting location for the robot
	# 3 : goal location
	# 5 : quicksand

	# Objective : learn how to navigate from the starting location to the goal with the highest total reward
	# Reward:
	# -1 : if the robot moves to an empty or blank space, or attempts to move into a wall
	# -100 : if the robot moves to a quicksand space
	# 1 : if the robot moves to the goal space

	# Learn the Qtable
	max_iterations = 50000
	learning_rate = 0.2
	gamma = 0.9
	random_action_decay_rate = 0.99

	def prepare_qtable():
	random_action_rate = 0.97
	start_state_tuple = (start_pos_iy, start_pos_ix)
	start_state = int(str(start_pos_iy) + str(start_pos_ix))
	s = start_state
	pos_y = start_pos_iy
	pos_x = start_pos_ix
	goal_reached = 0
	absorbed = 0

	for _ in range(max_iterations):
	action = act(random_action_rate, s)
	prev_state = (pos_y, pos_x)
	new_state = prev_state
	if action == 0:
	# move right
	new_state = (pos_y, pos_x + 1)
	elif action == 1:
	# move up
	new_state = (pos_y - 1, pos_x)
	elif action == 2:
	# move left
	new_state = (pos_y, pos_x - 1)
	elif action == 3:
	# move down
	new_state = (pos_y + 1, pos_x)

	if new_state[1] < 0:
	s_prime = -1 * int(str(new_state[0]) + str(-1*new_state[1]))
	else:
	s_prime = int(str(new_state[0]) + str(new_state[1]))

	if (s_prime) < 0 or (new_state[0] > 9) or (new_state[1] > 9) or (world[new_state[0]][new_state[1]] == 1):
	# if the agent tries to go beyond the environment or moves into the wall
	# give it a score of -1 and make new state of agent equal to the
	# old state
	new_state = prev_state
	s_prime = s
	reward = -1
	elif world[new_state[0]][new_state[1]] == 5:
	# if the agent moves into the quicksand, end the episode
	reward = -100
	new_state = start_state_tuple
	absorbed += 1
	elif world[new_state[0]][new_state[1]] == 1:
	# if the agent reaches the goal, end the episode with a
	# positive reward
	reward = 1
	new_state = start_state_tuple
	goal_reached += 1
	else:
	reward = -1

	update_qtable(s, action, s_prime, reward)

	s = s_prime
	a = action
	pos_y = new_state[0]
	pos_x = new_state[1]
	random_action_rate = random_action_rate * random_action_decay_rate

	print("Number of times agent reached the goal during the training: {}".format(goal_reached))
	print("Number of times agent was absorbed during the training: {}".format(absorbed))

	def update_qtable(s, a, s_prime, r):

	#print(s, ' ', a, ' ', s_prime, ' ', r)
	#print((1.0 - learning_rate) * qtable[s, a] + learning_rate * (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])]))
	qtable[s, a] = (1.0 - learning_rate) * qtable[s, a] + learning_rate * \
	(r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])])


	def act(rar, state):
	flip = rand.random()
	if flip < rar:
	# Take a random step
	action = rand.randint(0, number_of_actions - 1)
	else:
	# Take a calculated step
	action = np.argmax(qtable[state])

	return action



	if __name__ == "__main__":
	prepare_qtable()
	print("Qtable after {} iterations".format(max_iterations))
	print(qtable)
	print("Saving qtabe as qtable.csv")
	pd.DataFrame(data = qtable).to_csv('qtable.csv')