lisaah · February 20, 2014 00:20
diff --git a/nim_fun.py b/nim_fun.py
 from random import choice

 '''
 Some quick code to try out applying Q-Learning to Nim game in Python.
 '''
 def get_best_action(q, s):
 	'''
 	Given the q table and state,
 	pick the best action.
 	'''
 	poss_actions = q[s];
 	best_value = max(poss_actions);
 	best_actions = [i for i, j in enumerate(poss_actions) if j == best_value];
 	return choice(best_actions)

 def get_worst_action(q, s):
 	'''
 	Given the q table and state,
 	pick the best action.
 	'''
 	poss_actions = q[s];
 	best_value = min(poss_actions);
 	best_actions = [i for i, j in enumerate(poss_actions) if j == best_value];
 	return choice(best_actions)

 def get_random_action(q, s):
 	'''
 	Given the q table and state,
 	pick a random action.
 	'''
 	return choice([i for i, j in enumerate(q[s])]);

 def print_q(q):
 	for x in xrange(len(q)):
 		print "State:",x,(22-x),"left -",q[x]

 def nim_fun(num_sticks = 22, alpha = 1.0, gamma = .9):

 	# Initialize.
 	# Add states to account for going negative.
 	num_states = num_sticks + 6;
 	actions = (1, 2, 3);
 	Q = [[0, 0, 0] for x in xrange(num_states)];

 	iterations = 100000;
 	for i in xrange(iterations):

 		# Reset game.
 		curr_sticks = num_sticks;
 		old_state = 0;

 		while (0 < curr_sticks):

 			# Apply user action.
 			user_action_index = get_best_action(Q, curr_sticks);
 			curr_sticks -= actions[user_action_index];

 			reward = 0;
 			if curr_sticks < 1:
 				reward = -1000;	# User lost.
 			else:
 				# Apply computer action.
 				comp_action_index = get_random_action(Q, curr_sticks);
 				curr_sticks -= actions[comp_action_index];

 				if (curr_sticks < 1):
 					reward = 1000;		# Computer lost.

 			new_state = num_sticks - curr_sticks;
 			new_action_index = get_best_action(Q, curr_sticks);

 			# Update policy table.
 			Q[old_state][user_action_index] = ((1 - alpha) * Q[old_state][user_action_index] + 
 				alpha * (reward + gamma * Q[new_state][new_action_index]))

 			old_state = new_state;

 	print_q(Q)
 	

 nim_fun();
	from random import choice

	'''
	Some quick code to try out applying Q-Learning to Nim game in Python.
	'''
	def get_best_action(q, s):
	'''
	Given the q table and state,
	pick the best action.
	'''
	poss_actions = q[s];
	best_value = max(poss_actions);
	best_actions = [i for i, j in enumerate(poss_actions) if j == best_value];
	return choice(best_actions)

	def get_worst_action(q, s):
	'''
	Given the q table and state,
	pick the best action.
	'''
	poss_actions = q[s];
	best_value = min(poss_actions);
	best_actions = [i for i, j in enumerate(poss_actions) if j == best_value];
	return choice(best_actions)

	def get_random_action(q, s):
	'''
	Given the q table and state,
	pick a random action.
	'''
	return choice([i for i, j in enumerate(q[s])]);

	def print_q(q):
	for x in xrange(len(q)):
	print "State:",x,(22-x),"left -",q[x]

	def nim_fun(num_sticks = 22, alpha = 1.0, gamma = .9):

	# Initialize.
	# Add states to account for going negative.
	num_states = num_sticks + 6;
	actions = (1, 2, 3);
	Q = [[0, 0, 0] for x in xrange(num_states)];

	iterations = 100000;
	for i in xrange(iterations):

	# Reset game.
	curr_sticks = num_sticks;
	old_state = 0;

	while (0 < curr_sticks):

	# Apply user action.
	user_action_index = get_best_action(Q, curr_sticks);
	curr_sticks -= actions[user_action_index];

	reward = 0;
	if curr_sticks < 1:
	reward = -1000; # User lost.
	else:
	# Apply computer action.
	comp_action_index = get_random_action(Q, curr_sticks);
	curr_sticks -= actions[comp_action_index];

	if (curr_sticks < 1):
	reward = 1000; # Computer lost.

	new_state = num_sticks - curr_sticks;
	new_action_index = get_best_action(Q, curr_sticks);

	# Update policy table.
	Q[old_state][user_action_index] = ((1 - alpha) * Q[old_state][user_action_index] +
	alpha * (reward + gamma * Q[new_state][new_action_index]))

	old_state = new_state;

	print_q(Q)


	nim_fun();