Last active
September 13, 2017 23:55
-
-
Save sir-deenicus/623e57a88c3321edde2a00c0d1cd7053 to your computer and use it in GitHub Desktop.
Regret Minimization on Rock, Paper, Scissors (Games with gifts and no gifts)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LEARNED STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
R | 0 | |
P | 0.999 | |
S | 0 | |
T | 0 | |
LEARNED STRATEGY VS FIXED ADVERSARY | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
P | R | 0.999 | 0.4 | 0.399 | 0.399 | -0.399 | |
P | S | 0.999 | 0.1 | 0.1 | -0.1 | 0.1 | |
P | T | 0.999 | 0.1 | 0.1 | -0.1 | 0.1 | |
Player1: 0.2 | Player2 : -0.2 | |
~Strategy EV (10 rounds, $5/round) | |
[π°,100%] vs [π,40% ;π°, 40%; β, 10%; π ,10%] = 12.0 | |
EQUIL1 | |
Move | Move Probability | |
---- | ---------------- | |
R | 0.332 | |
P | 0.333 | |
S | 0.335 | |
T | 0 | |
EQUIL2 | |
Move | Move Probability | |
---- | ---------------- | |
R | 0.333 | |
P | 0.335 | |
S | 0.333 | |
T | 0 | |
EQUIL | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
R | P | 0.332 | 0.335 | 0.111 | -0.111 | 0.111 | |
R | S | 0.332 | 0.333 | 0.11 | 0.11 | -0.11 | |
P | R | 0.333 | 0.333 | 0.111 | 0.111 | -0.111 | |
P | S | 0.333 | 0.333 | 0.111 | -0.111 | 0.111 | |
S | R | 0.335 | 0.333 | 0.111 | -0.111 | 0.111 | |
S | P | 0.335 | 0.335 | 0.112 | 0.112 | -0.112 | |
Player1: 0.0 | Player2 : 0.0 | |
EQUIL1 VS ADV | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
R | P | 0.334 | 0.2 | 0.067 | -0.067 | 0.067 | |
R | S | 0.334 | 0.1 | 0.033 | 0.033 | -0.033 | |
R | T | 0.334 | 0.4 | 0.134 | 0.134 | -0.134 | |
P | R | 0.331 | 0.3 | 0.099 | 0.099 | -0.099 | |
P | S | 0.331 | 0.1 | 0.033 | -0.033 | 0.033 | |
P | T | 0.331 | 0.4 | 0.133 | -0.133 | 0.133 | |
S | R | 0.335 | 0.3 | 0.1 | -0.1 | 0.1 | |
S | P | 0.335 | 0.2 | 0.067 | 0.067 | -0.067 | |
S | T | 0.335 | 0.4 | 0.134 | 0.134 | -0.134 | |
Player1: 0.13 | Player2 : -0.13 | |
ROCK VS EQUIL2 | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
R | P | 1 | 0.335 | 0.335 | -0.335 | 0.335 | |
R | S | 1 | 0.333 | 0.333 | 0.333 | -0.333 | |
Player1: 0.0 | Player2 : 0.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
let random = new Random() | |
module Array = | |
let indexed a = Array.mapi (fun i x -> (i,x)) a | |
let selectColumn c a = | |
Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a | |
module String = | |
let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " " | |
let inline toupper (str:string) = str.ToUpper() | |
let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s) | |
let buildTableRow (collens:_[]) (row:string[]) = | |
row |> Array.mapi (fun i s -> String.pad collens.[i] s) |> joinToStringWith " | " | |
let makeTable newline headers title (table:string[][]) = | |
let hlen = Array.map String.length headers | |
let lens = table |> Array.map (Array.map (String.length)) | |
let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|] | |
let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline | |
let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|] | |
|> Array.map (buildTableRow longest) | |
|> joinToStringWith newline | |
String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0) | |
let cdf p = | |
p |> Array.fold (fun (total, list) v -> | |
let cd = v + total | |
cd, cd::list) (0., []) | |
|> snd | |
|> Array.ofList | |
let getDiscreteSampleFromCDF (pcdf:float[]) = | |
let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1 | |
let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx | |
abs(cummProb pcdlen - pcdlen) | |
let discreteSample p = cdf p |> getDiscreteSampleFromCDF | |
let inline pairop op (x,y) (u,v) = (op x u, op y v) | |
let round (n:int) (x:float) = Math.Round(x,n) | |
let selectColumn c a = Array.map (Array.indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a | |
let getStrategy (strategySum:_[]) regretSum = | |
let strategy = Array.map (max 0.) regretSum | |
let nSum, numactions = Array.sum strategy, strategy.Length | |
let strategy' = | |
if nSum <= 0. then | |
Array.create numactions (1./float numactions) | |
else Array.map (fun x -> x/nSum) strategy | |
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a] | |
strategy' | |
let getAvgStrategy strategySum = | |
let nSum, numactions = Array.sum strategySum, strategySum.Length | |
if nSum <= 0. then | |
Array.create numactions (1./float numactions) | |
else Array.map (fun x -> x/nSum) strategySum | |
let getAction (actions:_[]) strategy = actions.[discreteSample strategy] | |
let train1 iters f actions actionDistr regretSum strategySum = | |
for _ in 0..iters - 1 do | |
let strategy = getStrategy strategySum regretSum | |
let otheraction = getAction actions actionDistr | |
let heroaction = getAction actions strategy | |
let heroutil,_ = f (heroaction,otheraction) | |
Array.iteri (fun a action -> | |
let altutil, _ = f (action,otheraction) | |
regretSum.[a] <- regretSum.[a] + (altutil - heroutil)) actions | |
let train iters f actions regretSum regretSum2 strategySum strategySum2 = | |
for _ in 0..iters - 1 do | |
let strategy = getStrategy strategySum regretSum | |
let strategy2 = getStrategy strategySum2 regretSum2 | |
let otheraction = getAction actions strategy2 | |
let heroaction = getAction actions strategy | |
let heroutil,otherutil = f (heroaction,otheraction) | |
Array.iteri (fun a action -> | |
let altutil , _ = f (action,otheraction) | |
let altutil2, _ = f (action,heroaction) | |
regretSum.[a] <- regretSum.[a] + (altutil - heroutil) | |
regretSum2.[a] <- regretSum2.[a] + (altutil2 - otherutil)) actions | |
let inline displayTable2 title roundTableTo roundProbTo keepZeroes winner strat1 strat2 = | |
let buildTable player = | |
[|for (move,p) in strat1 do | |
for (move2,p2) in strat2 -> | |
[|string move; string move2; string (round roundTableTo p) ; | |
string (round roundTableTo p2); string (round roundTableTo (p * p2)); ""; ""|] , | |
(player (winner (move,move2)) * (p * p2) , | |
player (winner (move2,move)) * (p * p2))|] | |
let table = buildTable fst | |
let tabledisp = Array.filter (fun (_,(p,_)) -> keepZeroes || round roundProbTo p <> 0.) table | |
|> Array.map (fun (row,(u,u2)) -> row.[row.Length - 2] <- string (round roundTableTo u) | |
row.[row.Length - 1] <- string (round roundTableTo u2); row) | |
let astable = makeTable "\n" [|"Player 1";"Player 2";"Prob 1";"Prob 2";"Joint Prob";"Player 1 Util";"Player 2 Util"|] title tabledisp | |
printfn "\n%s\n\nPlayer1: %A | Player2 : %A" | |
astable (Array.sumBy (snd >> fst) (table) |> round roundProbTo) | |
(Array.sumBy (snd >> snd) (table) |> round roundProbTo) | |
let inline displayTable title winner strat1 strat2 = displayTable2 title 3 2 false winner strat1 strat2 | |
let inline printStrategy2 title n strat = | |
let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat | |
let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows | |
printfn "\n%s" astable | |
let inline printStrategy title strat = printStrategy2 title 3 strat | |
let simulatePlay dodisp rounds bet winner strat1 strat2 = | |
let results = | |
[|for _ in 0..rounds - 1 -> | |
let move1 = strat1 |> Array.unzip ||> getAction | |
let move2 = strat2 |> Array.unzip ||> getAction | |
let res1, res2 = winner (move1, move2) | |
res1 * bet, res2 * bet|] | |
let p1win,p2win = Array.fold (pairop (+)) (0.,0.) results | |
if dodisp then printfn "Player1 wins: %A | Player2 wins: %A" p1win p2win | |
p1win,p2win | |
////////////////////////////// | |
let reset (a:_[]) = for i in 0..a.Length - 1 do a.[i] <- 0. | |
type Moves2 = R | P | S | T | |
//The numbers control the utilities for the game. Ensure left and right sum to 0. | |
let winner2 = | |
function | |
| (R,R) -> ( 0.,0. ) | |
| (P,R) -> ( 1.,-1.) | |
| (S,R) -> (-1.,1. ) | |
| (T,R) -> (1.,-1. ) | |
| (R,P) -> (-1.,1. ) | |
| (P,P) -> ( 0.,0. ) | |
| (S,P) -> ( 1.,-1.) | |
| (T,P) -> ( 1.,-1.) | |
| (R,S) -> ( 1.,-1.) | |
| (P,S) -> (-1.,1. ) | |
| (S,S) -> ( 0.,0. ) | |
| (T,S) -> ( -1.,1.) | |
| (R,T) -> (-1.,1. ) | |
| (P,T) -> (-1.,1. ) | |
| (S,T) -> (1.,-1. ) | |
| (T,T) -> (0.,0. ) | |
//The numbers control the utilities for the game. Ensure left and right sum to 0. | |
let winner3 = | |
function | |
| (R,R) -> ( 0.,0. ) | |
| (P,R) -> ( 1.,-1.) | |
| (S,R) -> (-1.,1. ) | |
//This essentially flips a coin but can change the number to <= 1. | |
| (T,R) -> if random.NextDouble() < 0.5 then (1.,-1. ) else (-1.,1. ) | |
| (R,P) -> (-1.,1. ) | |
| (P,P) -> ( 0.,0. ) | |
| (S,P) -> ( 1.,-1.) | |
//roll a die with sides 4 or less as wins. | |
| (T,P) -> if random.NextDouble() < 4./6. then (1.,-1. ) else (-1.,1. ) | |
| (R,S) -> ( 1.,-1.) | |
| (P,S) -> (-1.,1. ) | |
| (S,S) -> ( 0.,0. ) | |
| (T,S) -> (-1.,1.) | |
| (R,T) -> if random.NextDouble() < 0.5 then (-1.,1. ) else (1.,-1. ) | |
| (P,T) -> if random.NextDouble() < 4./6. then (-1.,1. ) else (1.,-1. ) | |
| (S,T) -> (1.,-1.) | |
| (T,T) -> (0.,0. ) | |
//--------------------------- | |
let regretSumX = [|0.;0.;0.;0.|] | |
let strategySumX = [|0.;0.;0.;0.|] | |
let regretSum2X = [|0.;0.;0.;0.|] | |
let strategySum2X = [|0.;0.;0.;0.|] | |
let rpstStratDef = Array.zip [|R;P;S;T|] [|0.4;0.4;0.1;0.1|] //<== These numbers can be changed. Ensure sums to 1. | |
let rpstStratDef2 = Array.zip [|R;P;S;T|] [|0.3;0.2;0.1;0.4|] //<== These numbers can be changed. Ensure sums to 1. | |
let purePaper = Array.zip [|R;P;S;T|] [|0.;1.;0.;0.|] | |
let pureRock = Array.zip [|R;P;S;T|] [|1.;0.;0.;0.|] | |
let pureTelephone = Array.zip [|R;P;S;T|] [|0.;0.;0.;1.|] | |
let winnerFunction = winner3 //<== Change the name of this function to either *winner2* or *winner3* to control what is learnt | |
train1 200_000 winnerFunction [|R;P;S;T|] (Array.map snd rpstStratDef) regretSumX strategySumX | |
let rpstStrat1 = Array.zip [|R;P;S;T|] (getAvgStrategy strategySumX) | |
printStrategy "Learned Strategy" rpstStrat1 | |
displayTable "Learned Strategy vs Fixed Adversary" winnerFunction rpstStrat1 rpstStratDef //note that displayTable probabilities and expected values aren't quite correct for winner3 | |
let avg = [|for _ in 0..99 -> simulatePlay false 10 5. winnerFunction rpstStrat1 rpstStratDef|] |> Array.averageBy fst | |
printfn "\n~Strategy EV (10 rounds, $5/round)\n[π°,100%%] vs [π,40%% ;π°, 40%%; β, 10%%; π ,10%%] = %A" avg | |
//--------------------------- | |
reset regretSumX; reset strategySumX | |
train 200_000 winnerFunction [|R;P;S;T|] regretSumX regretSum2X strategySumX strategySum2X | |
let rpstStrat1b, rpstStrat2 = | |
Array.zip [|R;P;S;T|] (getAvgStrategy strategySumX) , | |
Array.zip [|R;P;S;T|] (getAvgStrategy strategySum2X) | |
printStrategy "Equil1" rpstStrat1b | |
printStrategy "Equil2" rpstStrat2 | |
displayTable "Equil" winnerFunction rpstStrat1b rpstStrat2 | |
displayTable "equil1 vs adv" winnerFunction rpstStrat1b rpstStratDef2 | |
displayTable "rock vs equil2" winnerFunction pureRock rpstStrat2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
LEARNED STRATEGY
LEARNED STRATEGY VS FIXED ADVERSARY
Player1: 0.2 | Player2 : -0.2
~Strategy EV (10 rounds, $5/round)
[:newspaper:,100%] vs [:fist:,40% ;:newspaper:, 40%; :scissors:, 10%; :telephone:,10%] = 12.0
EQUIL1
EQUIL2
EQUIL
Player1: 0.0 | Player2 : 0.0
EQUIL1 VS ADV
Player1: 0.13 | Player2 : -0.13
ROCK VS EQUIL2
Player1: 0.0 | Player2 : 0.0