Skip to content

Instantly share code, notes, and snippets.

@lisaah
Created February 20, 2014 00:20
Show Gist options
  • Save lisaah/9104444 to your computer and use it in GitHub Desktop.
Save lisaah/9104444 to your computer and use it in GitHub Desktop.
from random import choice
'''
Some quick code to try out applying Q-Learning to Nim game in Python.
'''
def get_best_action(q, s):
'''
Given the q table and state,
pick the best action.
'''
poss_actions = q[s];
best_value = max(poss_actions);
best_actions = [i for i, j in enumerate(poss_actions) if j == best_value];
return choice(best_actions)
def get_worst_action(q, s):
'''
Given the q table and state,
pick the best action.
'''
poss_actions = q[s];
best_value = min(poss_actions);
best_actions = [i for i, j in enumerate(poss_actions) if j == best_value];
return choice(best_actions)
def get_random_action(q, s):
'''
Given the q table and state,
pick a random action.
'''
return choice([i for i, j in enumerate(q[s])]);
def print_q(q):
for x in xrange(len(q)):
print "State:",x,(22-x),"left -",q[x]
def nim_fun(num_sticks = 22, alpha = 1.0, gamma = .9):
# Initialize.
# Add states to account for going negative.
num_states = num_sticks + 6;
actions = (1, 2, 3);
Q = [[0, 0, 0] for x in xrange(num_states)];
iterations = 100000;
for i in xrange(iterations):
# Reset game.
curr_sticks = num_sticks;
old_state = 0;
while (0 < curr_sticks):
# Apply user action.
user_action_index = get_best_action(Q, curr_sticks);
curr_sticks -= actions[user_action_index];
reward = 0;
if curr_sticks < 1:
reward = -1000; # User lost.
else:
# Apply computer action.
comp_action_index = get_random_action(Q, curr_sticks);
curr_sticks -= actions[comp_action_index];
if (curr_sticks < 1):
reward = 1000; # Computer lost.
new_state = num_sticks - curr_sticks;
new_action_index = get_best_action(Q, curr_sticks);
# Update policy table.
Q[old_state][user_action_index] = ((1 - alpha) * Q[old_state][user_action_index] +
alpha * (reward + gamma * Q[new_state][new_action_index]))
old_state = new_state;
print_q(Q)
nim_fun();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment