Last active
September 13, 2017 05:35
-
-
Save sir-deenicus/e4e19f9a01fb224fecd3ebdcd02c3f71 to your computer and use it in GitHub Desktop.
Regret Minimization. Nash Equilibrium for Rock Paper Scissors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
STRATEGY VS STATIC ADVERSARY | |
Move | Move Probability | |
-------- | ---------------- | |
Rock | 0 | |
Paper | 1 | |
Scissors | 0 | |
STRATEGY VS STATIC ADVERSARY | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
Paper | Rock | 1 | 0.7 | 0.7 | 0.7 | -0.7 | |
Paper | Scissors | 1 | 0.2 | 0.2 | -0.2 | 0.2 | |
Player1: 0.5 | Player2 : -0.5 | |
STATIC ADVERSARY2 VS STATIC ADVERSARY1 | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
Rock | Paper | 0.2 | 0.1 | 0.02 | -0.02 | 0.02 | |
Rock | Scissors | 0.2 | 0.2 | 0.04 | 0.04 | -0.04 | |
Paper | Rock | 0.7 | 0.7 | 0.49 | 0.49 | -0.49 | |
Paper | Scissors | 0.7 | 0.2 | 0.14 | -0.14 | 0.14 | |
Scissors | Rock | 0.1 | 0.7 | 0.07 | -0.07 | 0.07 | |
Scissors | Paper | 0.1 | 0.1 | 0.01 | 0.01 | -0.01 | |
Player1: 0.31 | Player2 : -0.31 | |
STATIC ADVERSARY1 VS MIXED STRATEGY | |
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util | |
-------- | -------- | ------ | ------ | ---------- | ------------- | ------------- | |
Rock | Paper | 0.2 | 0.333 | 0.067 | -0.067 | 0.067 | |
Rock | Scissors | 0.2 | 0.333 | 0.067 | 0.067 | -0.067 | |
Paper | Rock | 0.7 | 0.333 | 0.233 | 0.233 | -0.233 | |
Paper | Scissors | 0.7 | 0.333 | 0.233 | -0.233 | 0.233 | |
Scissors | Rock | 0.1 | 0.333 | 0.033 | -0.033 | 0.033 | |
Scissors | Paper | 0.1 | 0.333 | 0.033 | 0.033 | -0.033 | |
Player1: 0.0 | Player2 : 0.0 | |
Strategy vs Adversary 100 rounds of 5$/rounds | |
Player1 wins: 210.0 | Player2 wins: -210.0 | |
Player1=Mixed Strategy vs Adversary 100 rounds of 5$/rounds | |
Player1 wins: 5.0 | Player2 wins: -5.0 | |
[π°, 100%] vs [π,70% ;π°, 10%; β, 20%]=249.425 | |
[π,70% ;π°, 10%; β, 20%] vs [π,1/3 ;π°, 1/3; β, 1/3 ] = 0.175 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
let random = new Random() | |
module Array = | |
let indexed a = Array.mapi (fun i x -> (i,x)) a | |
let selectColumn c a = | |
Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a | |
module String = | |
let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " " | |
let inline toupper (str:string) = str.ToUpper() | |
let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s) | |
let buildTableRow (collens:_[]) (row:string[]) = | |
row |> Array.mapi (fun i s -> String.pad collens.[i] s) |> joinToStringWith " | " | |
let makeTable newline headers title (table:string[][]) = | |
let hlen = Array.map String.length headers | |
let lens = table |> Array.map (Array.map (String.length)) | |
let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|] | |
let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline | |
let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|] | |
|> Array.map (buildTableRow longest) | |
|> joinToStringWith newline | |
String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0) | |
let cdf p = | |
p |> Array.fold (fun (total, list) v -> | |
let cd = v + total | |
cd, cd::list) (0., []) | |
|> snd | |
|> Array.ofList | |
let getDiscreteSampleFromCDF (pcdf:float[]) = | |
let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1 | |
let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx | |
abs(cummProb pcdlen - pcdlen) | |
let discreteSample p = cdf p |> getDiscreteSampleFromCDF | |
let inline pairop op (x,y) (u,v) = (op x u, op y v) | |
let round (n:int) (x:float) = Math.Round(x,n) | |
let selectColumn c a = Array.map (Array.indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a | |
let getStrategy (strategySum:_[]) regretSum = | |
let strategy = Array.map (max 0.) regretSum | |
let nSum, numactions = Array.sum strategy, strategy.Length | |
let strategy' = | |
if nSum <= 0. then | |
Array.create numactions (1./float numactions) | |
else Array.map (fun x -> x/nSum) strategy | |
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a] | |
strategy' | |
let getAvgStrategy strategySum = | |
let nSum, numactions = Array.sum strategySum, strategySum.Length | |
if nSum <= 0. then | |
Array.create numactions (1./float numactions) | |
else Array.map (fun x -> x/nSum) strategySum | |
let getAction (actions:_[]) strategy = actions.[discreteSample strategy] | |
let train1 iters f actions actionDistr regretSum strategySum = | |
for _ in 0..iters - 1 do | |
let strategy = getStrategy strategySum regretSum | |
let otheraction = getAction actions actionDistr | |
let heroaction = getAction actions strategy | |
let heroutil,_ = f (heroaction,otheraction) | |
Array.iteri (fun a action -> | |
let altutil, _ = f (action,otheraction) | |
regretSum.[a] <- regretSum.[a] + (altutil - heroutil)) actions | |
let inline displayTable2 title roundTableTo roundProbTo keepZeroes winner strat1 strat2 = | |
let buildTable player = | |
[|for (move,p) in strat1 do | |
for (move2,p2) in strat2 -> | |
[|string move; string move2; string (round roundTableTo p) ; | |
string (round roundTableTo p2); string (round roundTableTo (p * p2)); ""; ""|] , | |
(player (winner (move,move2)) * (p * p2) , | |
player (winner (move2,move)) * (p * p2))|] | |
let table = buildTable fst | |
let tabledisp = Array.filter (fun (_,(p,_)) -> keepZeroes || round roundProbTo p <> 0.) table | |
|> Array.map (fun (row,(u,u2)) -> row.[row.Length - 2] <- string (round roundTableTo u) | |
row.[row.Length - 1] <- string (round roundTableTo u2); row) | |
let astable = makeTable "\n" [|"Player 1";"Player 2";"Prob 1";"Prob 2";"Joint Prob";"Player 1 Util";"Player 2 Util"|] title tabledisp | |
printfn "%s\n\nPlayer1: %A | Player2 : %A" | |
astable (Array.sumBy (snd >> fst) (table) |> round roundProbTo) | |
(Array.sumBy (snd >> snd) (table) |> round roundProbTo) | |
let inline displayTable title winner strat1 strat2 = displayTable2 title 3 2 false winner strat1 strat2 | |
let inline printStrategy2 title n strat = | |
let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat | |
let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows | |
printfn "%s" astable | |
let inline printStrategy title strat = printStrategy2 title 3 strat | |
let simulatePlay dodisp rounds bet winner strat1 strat2 = | |
let results = | |
[|for _ in 0..rounds - 1 -> | |
let move1 = strat1 |> Array.unzip ||> getAction | |
let move2 = strat2 |> Array.unzip ||> getAction | |
let res1, res2 = winner (move1, move2) | |
res1 * bet, res2 * bet|] | |
let p1win,p2win = Array.fold (pairop (+)) (0.,0.) results | |
if dodisp then printfn "Player1 wins: %A | Player2 wins: %A" p1win p2win | |
p1win,p2win | |
////////////////////////////// | |
type Moves = Rock | Paper | Scissors | |
let winner = | |
function | |
| (Rock, Rock) -> ( 0.,0. ) | |
| (Paper, Rock) -> ( 1.,-1.) | |
| (Scissors, Rock) -> (-1.,1. ) | |
| (Rock, Paper) -> (-1.,1. ) | |
| (Paper, Paper) -> ( 0.,0. ) | |
| (Scissors, Paper) -> ( 1.,-1.) | |
| (Rock, Scissors) -> ( 1.,-1.) | |
| (Paper, Scissors) -> (-1.,1. ) | |
| (Scissors, Scissors) -> ( 0.,0. ) | |
let regretSum = [|0.;0.;0.|] | |
let strategySum = [|0.;0.;0.|] | |
let rpsStratDef = Array.zip [|Rock;Paper;Scissors|] [|0.7;0.1;0.2|] //<== These numbers can be changed. Ensure sums to 1. | |
let rpsStratDef2 = Array.zip [|Rock;Paper;Scissors|] [|0.2;0.7;0.1|] //<== These numbers can be changed. Ensure sums to 1. | |
let rpsNE = Array.zip [|Rock;Paper;Scissors|] [|1./3.;1./3.;1./3.|] | |
// ___________ | |
// | | | |
//can change this \|/ | |
train1 20_000 winner [|Rock;Paper;Scissors|] (Array.map snd rpsStratDef) regretSum strategySum | |
let rpsStrat1 = Array.zip [|Rock;Paper;Scissors|] (getAvgStrategy strategySum) | |
printStrategy "Strategy vs Static Adversary" rpsStrat1 | |
displayTable "Strategy vs Static Adversary" winner rpsStrat1 rpsStratDef | |
displayTable "Static Adversary2 vs Static Adversary1" winner rpsStratDef2 rpsStratDef | |
displayTable "Static Adversary1 vs Mixed Strategy" winner rpsStratDef2 rpsNE | |
printfn "Strategy vs Adversary 100 rounds of 5$/rounds" | |
simulatePlay true 100 5. winner rpsStrat1 rpsStratDef | |
printfn "Player1=Mixed Strategy vs Adversary 100 rounds of 5$/rounds" | |
simulatePlay true 100 5. winner rpsStratDef rpsNE | |
let playstats = [|for _ in 0..999 -> simulatePlay false 100 5. winner rpsStrat1 rpsStratDef|] | |
printfn "[π°, 100%%] vs [π,70%% ;π°, 10%%; β, 20%%]=%A" (Array.averageBy fst playstats) | |
let playstatsb = [|for _ in 0..999 -> simulatePlay false 100 5. winner rpsStratDef rpsNE|] | |
printfn "\n[π,70%% ;π°, 10%%; β, 20%%] vs [π,1/3 ;π°, 1/3; β, 1/3 ] = %A" (Array.averageBy fst playstatsb) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
STRATEGY VS STATIC ADVERSARY
STRATEGY VS STATIC ADVERSARY
Player1: 0.5 | Player2 : -0.5
STATIC ADVERSARY2 VS STATIC ADVERSARY1
Player1: 0.31 | Player2 : -0.31
STATIC ADVERSARY1 VS MIXED STRATEGY
Player1: 0.0 | Player2 : 0.0
Strategy vs Adversary 100 rounds of $5/rounds
Player1 wins: 210.0 | Player2 wins: -210.0
Player1=Mixed Strategy vs Adversary 100 rounds of $5/rounds
Player1 wins: 5.0 | Player2 wins: -5.0
[:newspaper:, 100%] vs [:fist:,70% ;:newspaper:, 10%; :scissors:, 20%]=249.425
[:fist:,70% ;:newspaper:, 10%; :scissors:, 20%] vs [:fist:,1/3 ;:newspaper:, 1/3; :scissors:, 1/3 ] = 0.175