Last active
December 13, 2015 19:48
-
-
Save NikolasTzimoulis/4965057 to your computer and use it in GitHub Desktop.
Probabilistic forward-backward induction for solving centipede-style games.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
leavePayoffs = ( (0, 0), (-1, 3), (2, 2), (1, 5), (4, 4), (3, 7), (6, 6), (5, 9), (8, 8), (7, 11), (10, 10) ) | |
#leavePayoffs = ( (0, 0), (-1, 1), (2, -2), (-3, 3), (4, -4), (-5, 5), (6, -6), (-7, 7), (8, -8), (-9, 9), (10, -10) ) | |
backgroundPriors = [(0.99999, 1.0)] | |
lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs) | |
expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them | |
lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them | |
getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes | |
other = lambda player: (player+1)%2 # returns the other player | |
lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2) | |
maxRounds = 10 | |
for round in range(maxRounds): | |
print 'new lambda intervals:', lambdas | |
# backwards induction to update expected payoffs | |
for dnode in reversed(range(len(leavePayoffs))): | |
if dnode == len(leavePayoffs) - 1: # if this is the last node | |
expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice | |
else: | |
# calculate threshold value of lambda that changes the decision | |
u_a1 = leavePayoffs[dnode][getPlayer(dnode)] | |
u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))] | |
u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)] | |
u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))] | |
try: | |
thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2))) | |
# see which decision goes with the interval above the threshold | |
# and which decision goes with the interval below the threshold | |
if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2) | |
u_above_threshold = leavePayoffs[dnode] | |
u_below_threshold = expectedPayoffs[dnode+1] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 ) | |
else: | |
u_above_threshold = expectedPayoffs[dnode+1] | |
u_below_threshold = leavePayoffs[dnode] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 ) | |
except ZeroDivisionError: | |
thres = 0.0 | |
if u_b2 < u_a2: | |
u_above_threshold = leavePayoffs[dnode] | |
u_below_threshold = expectedPayoffs[dnode+1] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1])) | |
else: | |
u_above_threshold = expectedPayoffs[dnode+1] | |
u_below_threshold = leavePayoffs[dnode] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) ) | |
# calculate normalising constant so that | |
# the probability mass of lambda is 1.0 | |
norm_constant = 1.0 / ( lambdas[dnode][1] - lambdas[dnode][0] ) | |
# get the size of the intervals (lambda_min, thres) and (thres, lambda_max) | |
interval_above = max(0.0, lambdas[dnode][1] - max(thres, lambdas[dnode][0])) | |
interval_below = max(0.0, min(thres, lambdas[dnode][1]) - lambdas[dnode][0]) | |
# mix decisions according to intervals to get expected payoffs | |
node_payoffs = [] | |
for i in range(2): | |
node_payoffs.append(norm_constant * (interval_above * u_above_threshold[i] + interval_below * u_below_threshold[i])) | |
expectedPayoffs[dnode] = (node_payoffs[0], node_payoffs[1]) | |
print 'new expected payoffs:', expectedPayoffs | |
# forward induction to update lambdas | |
for dnode in range(len(leavePayoffs)): | |
nodePriors = backgroundPriors + lambdasGivenStay[0:dnode] | |
lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
leavePayoffs = ( (0, 0), (-1, 3), (2, 2), (1, 5), (4, 4), (3, 7), (6, 6), (5, 9), (8, 8), (7, 11), (10, 10) ) | |
#leavePayoffs = ( (0, 0), (-1, 1), (2, -2), (-3, 3), (4, -4), (-5, 5), (6, -6), (-7, 7), (8, -8), (-9, 9), (10, -10) ) | |
backgroundPriors = [(0.99999, 1.0)] | |
lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs) | |
expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them | |
lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them | |
getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes | |
other = lambda player: (player+1)%2 # returns the other player | |
lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2) | |
maxRounds = 1000 | |
resultCounter = {} | |
for round in range(maxRounds): | |
print 'new lambda intervals:', lambdas | |
# backwards induction to update expected payoffs | |
for dnode in reversed(range(len(leavePayoffs))): | |
if dnode == len(leavePayoffs) - 1: # if this is the last node | |
expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice | |
else: | |
# calculate threshold value of lambda that changes the decision | |
u_a1 = leavePayoffs[dnode][getPlayer(dnode)] | |
u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))] | |
u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)] | |
u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))] | |
try: | |
thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2))) | |
# see which decision goes with the interval above the threshold | |
# and which decision goes with the interval below the threshold | |
if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2) | |
u_above_threshold = leavePayoffs[dnode] | |
u_below_threshold = expectedPayoffs[dnode+1] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 ) | |
else: | |
u_above_threshold = expectedPayoffs[dnode+1] | |
u_below_threshold = leavePayoffs[dnode] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 ) | |
except ZeroDivisionError: | |
thres = 0.0 | |
if u_b2 < u_a2: | |
u_above_threshold = leavePayoffs[dnode] | |
u_below_threshold = expectedPayoffs[dnode+1] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1])) | |
else: | |
u_above_threshold = expectedPayoffs[dnode+1] | |
u_below_threshold = leavePayoffs[dnode] | |
# apply stay constraint to lambda interval (to use for lambda update) | |
lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) ) | |
# sample the lambda distribution | |
lambda_sample = random.uniform(lambdas[dnode][0], lambdas[dnode][1]) | |
# assign node expected utility based on sample | |
if lambda_sample >= thres: | |
expectedPayoffs[dnode] = u_above_threshold | |
else: | |
expectedPayoffs[dnode] = u_below_threshold | |
if expectedPayoffs[0] in resultCounter: | |
resultCounter[expectedPayoffs[0]] += 1 | |
else: | |
resultCounter[expectedPayoffs[0]] = 1 | |
print 'new expected payoffs:', expectedPayoffs | |
# forward induction to update lambdas | |
for dnode in range(len(leavePayoffs)): | |
nodePriors = backgroundPriors + lambdasGivenStay[0:dnode] | |
lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors)) | |
print resultCounter |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment