NikolasTzimoulis · December 13, 2015 19:48
diff --git a/centipede.py b/centipede.py
 leavePayoffs = (  (0, 0),  (-1, 3),  (2, 2),  (1, 5),  (4, 4),  (3, 7),  (6, 6),  (5, 9),  (8, 8),  (7, 11),  (10, 10)  )
 #leavePayoffs = (  (0, 0),  (-1, 1),  (2, -2),  (-3, 3),  (4, -4),  (-5, 5),  (6, -6),  (-7, 7),  (8, -8),  (-9, 9),  (10, -10)  )
 backgroundPriors = [(0.99999, 1.0)]
 lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs)
 expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them
 lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them
 getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes
 other = lambda player: (player+1)%2 # returns the other player 
 lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2) 
 maxRounds = 10


 for round in range(maxRounds):
    print 'new lambda intervals:', lambdas
    
    # backwards induction to update expected payoffs
    for dnode in reversed(range(len(leavePayoffs))): 
        if dnode == len(leavePayoffs) - 1: # if this is the last node
            expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice
        else:
            # calculate threshold value of lambda that changes the decision
            u_a1 = leavePayoffs[dnode][getPlayer(dnode)] 
            u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))]
            u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)]
            u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))]
            try:
                thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2)))
                # see which decision goes with the interval above the threshold
                # and which decision goes with the interval below the threshold 
                if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2)
                    u_above_threshold = leavePayoffs[dnode]
                    u_below_threshold = expectedPayoffs[dnode+1]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 ) 
                else:
                    u_above_threshold = expectedPayoffs[dnode+1]
                    u_below_threshold = leavePayoffs[dnode]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 ) 
            except ZeroDivisionError:
                thres = 0.0  
                if u_b2 < u_a2:
                    u_above_threshold = leavePayoffs[dnode]
                    u_below_threshold = expectedPayoffs[dnode+1]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1]))
                else:
                    u_above_threshold = expectedPayoffs[dnode+1]              
                    u_below_threshold = leavePayoffs[dnode]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) )
            # calculate normalising constant so that 
            # the probability mass of lambda is 1.0
            norm_constant = 1.0 / ( lambdas[dnode][1] - lambdas[dnode][0] )
            # get the size of the intervals (lambda_min, thres) and (thres, lambda_max)
            interval_above = max(0.0, lambdas[dnode][1] - max(thres, lambdas[dnode][0]))
            interval_below = max(0.0, min(thres, lambdas[dnode][1]) - lambdas[dnode][0])
            # mix decisions according to intervals to get expected payoffs 
            node_payoffs = []
            for i in range(2):
                node_payoffs.append(norm_constant * (interval_above * u_above_threshold[i] + interval_below * u_below_threshold[i]))
            expectedPayoffs[dnode] = (node_payoffs[0], node_payoffs[1])
            
    print 'new expected payoffs:', expectedPayoffs
    print
                
    # forward induction to update lambdas     
    for dnode in range(len(leavePayoffs)):
        nodePriors = backgroundPriors + lambdasGivenStay[0:dnode]
        lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors))
diff --git a/centipede_montecarlo.py b/centipede_montecarlo.py
 import random

 leavePayoffs = (  (0, 0),  (-1, 3),  (2, 2),  (1, 5),  (4, 4),  (3, 7),  (6, 6),  (5, 9),  (8, 8),  (7, 11),  (10, 10)  )
 #leavePayoffs = (  (0, 0),  (-1, 1),  (2, -2),  (-3, 3),  (4, -4),  (-5, 5),  (6, -6),  (-7, 7),  (8, -8),  (-9, 9),  (10, -10)  )
 backgroundPriors = [(0.99999, 1.0)]
 lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs)
 expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them
 lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them
 getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes
 other = lambda player: (player+1)%2 # returns the other player 
 lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2) 
 maxRounds = 1000

 resultCounter = {}
 for round in range(maxRounds):
    print 'new lambda intervals:', lambdas
    
    # backwards induction to update expected payoffs
    for dnode in reversed(range(len(leavePayoffs))): 
        if dnode == len(leavePayoffs) - 1: # if this is the last node
            expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice
        else:
            # calculate threshold value of lambda that changes the decision
            u_a1 = leavePayoffs[dnode][getPlayer(dnode)] 
            u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))]
            u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)]
            u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))]
            try:
                thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2)))
                # see which decision goes with the interval above the threshold
                # and which decision goes with the interval below the threshold 
                if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2)
                    u_above_threshold = leavePayoffs[dnode]
                    u_below_threshold = expectedPayoffs[dnode+1]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 ) 
                else:
                    u_above_threshold = expectedPayoffs[dnode+1]
                    u_below_threshold = leavePayoffs[dnode]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 ) 
            except ZeroDivisionError:
                thres = 0.0  
                if u_b2 < u_a2:
                    u_above_threshold = leavePayoffs[dnode]
                    u_below_threshold = expectedPayoffs[dnode+1]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1]))
                else:
                    u_above_threshold = expectedPayoffs[dnode+1]              
                    u_below_threshold = leavePayoffs[dnode]
                    # apply stay constraint to lambda interval (to use for lambda update)
                    lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) )
            # sample the lambda distribution
            lambda_sample = random.uniform(lambdas[dnode][0], lambdas[dnode][1])
            # assign node expected utility based on sample
            if lambda_sample >= thres:
                expectedPayoffs[dnode] = u_above_threshold
            else:
                expectedPayoffs[dnode] = u_below_threshold       
    
    if expectedPayoffs[0] in resultCounter:
        resultCounter[expectedPayoffs[0]] += 1
    else:
        resultCounter[expectedPayoffs[0]] = 1
    print 'new expected payoffs:', expectedPayoffs
    print
                
    # forward induction to update lambdas     
    for dnode in range(len(leavePayoffs)):
        nodePriors = backgroundPriors + lambdasGivenStay[0:dnode]
        lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors))
        
 print resultCounter
	leavePayoffs = ( (0, 0), (-1, 3), (2, 2), (1, 5), (4, 4), (3, 7), (6, 6), (5, 9), (8, 8), (7, 11), (10, 10) )
	#leavePayoffs = ( (0, 0), (-1, 1), (2, -2), (-3, 3), (4, -4), (-5, 5), (6, -6), (-7, 7), (8, -8), (-9, 9), (10, -10) )
	backgroundPriors = [(0.99999, 1.0)]
	lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs)
	expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them
	lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them
	getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes
	other = lambda player: (player+1)%2 # returns the other player
	lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2)
	maxRounds = 10


	for round in range(maxRounds):
	print 'new lambda intervals:', lambdas

	# backwards induction to update expected payoffs
	for dnode in reversed(range(len(leavePayoffs))):
	if dnode == len(leavePayoffs) - 1: # if this is the last node
	expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice
	else:
	# calculate threshold value of lambda that changes the decision
	u_a1 = leavePayoffs[dnode][getPlayer(dnode)]
	u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))]
	u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)]
	u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))]
	try:
	thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2)))
	# see which decision goes with the interval above the threshold
	# and which decision goes with the interval below the threshold
	if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2)
	u_above_threshold = leavePayoffs[dnode]
	u_below_threshold = expectedPayoffs[dnode+1]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 )
	else:
	u_above_threshold = expectedPayoffs[dnode+1]
	u_below_threshold = leavePayoffs[dnode]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 )
	except ZeroDivisionError:
	thres = 0.0
	if u_b2 < u_a2:
	u_above_threshold = leavePayoffs[dnode]
	u_below_threshold = expectedPayoffs[dnode+1]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1]))
	else:
	u_above_threshold = expectedPayoffs[dnode+1]
	u_below_threshold = leavePayoffs[dnode]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) )
	# calculate normalising constant so that
	# the probability mass of lambda is 1.0
	norm_constant = 1.0 / ( lambdas[dnode][1] - lambdas[dnode][0] )
	# get the size of the intervals (lambda_min, thres) and (thres, lambda_max)
	interval_above = max(0.0, lambdas[dnode][1] - max(thres, lambdas[dnode][0]))
	interval_below = max(0.0, min(thres, lambdas[dnode][1]) - lambdas[dnode][0])
	# mix decisions according to intervals to get expected payoffs
	node_payoffs = []
	for i in range(2):
	node_payoffs.append(norm_constant * (interval_above * u_above_threshold[i] + interval_below * u_below_threshold[i]))
	expectedPayoffs[dnode] = (node_payoffs[0], node_payoffs[1])

	print 'new expected payoffs:', expectedPayoffs
	print

	# forward induction to update lambdas
	for dnode in range(len(leavePayoffs)):
	nodePriors = backgroundPriors + lambdasGivenStay[0:dnode]
	lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors))
	import random

	leavePayoffs = ( (0, 0), (-1, 3), (2, 2), (1, 5), (4, 4), (3, 7), (6, 6), (5, 9), (8, 8), (7, 11), (10, 10) )
	#leavePayoffs = ( (0, 0), (-1, 1), (2, -2), (-3, 3), (4, -4), (-5, 5), (6, -6), (-7, 7), (8, -8), (-9, 9), (10, -10) )
	backgroundPriors = [(0.99999, 1.0)]
	lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs)
	expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them
	lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them
	getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes
	other = lambda player: (player+1)%2 # returns the other player
	lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2)
	maxRounds = 1000

	resultCounter = {}
	for round in range(maxRounds):
	print 'new lambda intervals:', lambdas

	# backwards induction to update expected payoffs
	for dnode in reversed(range(len(leavePayoffs))):
	if dnode == len(leavePayoffs) - 1: # if this is the last node
	expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice
	else:
	# calculate threshold value of lambda that changes the decision
	u_a1 = leavePayoffs[dnode][getPlayer(dnode)]
	u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))]
	u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)]
	u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))]
	try:
	thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2)))
	# see which decision goes with the interval above the threshold
	# and which decision goes with the interval below the threshold
	if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2)
	u_above_threshold = leavePayoffs[dnode]
	u_below_threshold = expectedPayoffs[dnode+1]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 )
	else:
	u_above_threshold = expectedPayoffs[dnode+1]
	u_below_threshold = leavePayoffs[dnode]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 )
	except ZeroDivisionError:
	thres = 0.0
	if u_b2 < u_a2:
	u_above_threshold = leavePayoffs[dnode]
	u_below_threshold = expectedPayoffs[dnode+1]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1]))
	else:
	u_above_threshold = expectedPayoffs[dnode+1]
	u_below_threshold = leavePayoffs[dnode]
	# apply stay constraint to lambda interval (to use for lambda update)
	lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) )
	# sample the lambda distribution
	lambda_sample = random.uniform(lambdas[dnode][0], lambdas[dnode][1])
	# assign node expected utility based on sample
	if lambda_sample >= thres:
	expectedPayoffs[dnode] = u_above_threshold
	else:
	expectedPayoffs[dnode] = u_below_threshold

	if expectedPayoffs[0] in resultCounter:
	resultCounter[expectedPayoffs[0]] += 1
	else:
	resultCounter[expectedPayoffs[0]] = 1
	print 'new expected payoffs:', expectedPayoffs
	print

	# forward induction to update lambdas
	for dnode in range(len(leavePayoffs)):
	nodePriors = backgroundPriors + lambdasGivenStay[0:dnode]
	lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors))

	print resultCounter