Created
September 13, 2017 05:39
-
-
Save sir-deenicus/2ac6a2a064f4a82e09d81e60156d8915 to your computer and use it in GitHub Desktop.
Regret Minimization on Least Unique Integer Game
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Attempt at Nash | |
GUESS STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
0 | 0.501 | |
1 | 0.251 | |
2 | 0.125 | |
3 | 0.063 | |
4 | 0.03 | |
5 | 0.03 | |
GUESS STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
0 | 0.501 | |
1 | 0.251 | |
2 | 0.125 | |
3 | 0.063 | |
4 | 0.03 | |
5 | 0.03 | |
GUESS STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
0 | 0.501 | |
1 | 0.251 | |
2 | 0.125 | |
3 | 0.063 | |
4 | 0.03 | |
5 | 0.03 | |
===================== | |
Regret Matching on each other`s actions | |
GUESS STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
0 | 1 | |
1 | 0 | |
2 | 0 | |
3 | 0 | |
4 | 0 | |
5 | 0 | |
GUESS STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
0 | 0.6 | |
1 | 0.4 | |
2 | 0 | |
3 | 0 | |
4 | 0 | |
5 | 0 | |
GUESS STRATEGY | |
Move | Move Probability | |
---- | ---------------- | |
0 | 0 | |
1 | 0.333 | |
2 | 0.333 | |
3 | 0.333 | |
4 | 0 | |
5 | 0 | |
========= | |
Winrate of Strategy 1: 0.2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
let random = new Random() | |
let joinpair f (a,b) = f a b | |
let keepLeft f (x,y) = x, f y | |
let flip f a b = f b a | |
module Array = | |
let indexed a = Array.mapi (fun i x -> (i,x)) a | |
let selectColumn c a = | |
Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a | |
let inline normalizeWeights (a: ('a * 'b) []) = | |
let tot = Array.sumBy snd a | |
Array.map (keepLeft (flip (/) tot)) a | |
module String = | |
let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " " | |
let inline toupper (str:string) = str.ToUpper() | |
let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s) | |
let buildTableRow (collens:_[]) (row:string[]) = | |
row |> Array.mapi (fun i s -> String.pad collens.[i] s) |> joinToStringWith " | " | |
let makeTable newline headers title (table:string[][]) = | |
let hlen = Array.map String.length headers | |
let lens = table |> Array.map (Array.map (String.length)) | |
let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|] | |
let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline | |
let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|] | |
|> Array.map (buildTableRow longest) | |
|> joinToStringWith newline | |
String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0) | |
let cdf p = | |
p |> Array.fold (fun (total, list) v -> | |
let cd = v + total | |
cd, cd::list) (0., []) | |
|> snd | |
|> Array.ofList | |
let getDiscreteSampleFromCDF (pcdf:float[]) = | |
let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1 | |
let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx | |
abs(cummProb pcdlen - pcdlen) | |
let discreteSample p = cdf p |> getDiscreteSampleFromCDF | |
let round (n:int) (x:float) = Math.Round(x,n) | |
let getStrategy (strategySum:_[]) regretSum = | |
let strategy = Array.map (max 0.) regretSum | |
let nSum, numactions = Array.sum strategy, strategy.Length | |
let strategy' = | |
if nSum <= 0. then | |
Array.create numactions (1./float numactions) | |
else Array.map (fun x -> x/nSum) strategy | |
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a] | |
strategy' | |
let getAvgStrategy strategySum = | |
let nSum, numactions = Array.sum strategySum, strategySum.Length | |
if nSum <= 0. then | |
Array.create numactions (1./float numactions) | |
else Array.map (fun x -> x/nSum) strategySum | |
let getAction (actions:_[]) strategy = actions.[discreteSample strategy] | |
let inline printStrategy2 title n strat = | |
let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat | |
let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows | |
printfn "\n%s" astable | |
let inline printStrategy title strat = printStrategy2 title 3 strat | |
////////////////////////////// | |
let rateOutcomeForFirst (a,b,c) = | |
if a = b && a = c && b = c then 0. | |
elif a < b && a < c then 2. | |
elif a <> b && b = c then 2. | |
else -1. | |
let trainstep iters util actions stratsList = | |
for _ in 0..iters - 1 do | |
let strats = Array.map (fun (s,r) -> getStrategy s r) stratsList | |
let acts = Array.map (getAction actions) strats | |
let heroutil = util (acts.[0],acts.[1],acts.[2]) | |
let otherutil = util (acts.[1],acts.[0],acts.[2]) | |
let otherutil2 = util (acts.[2],acts.[0],acts.[1]) | |
let utils = [|heroutil;otherutil;otherutil2|] | |
let jointRegretSum = Array.map snd stratsList | |
Array.iteri (fun a action -> | |
let altutil:float = util (action,acts.[1],acts.[2]) | |
let altutil2 = util (action,acts.[0],acts.[2]) | |
let altutil3 = util (action,acts.[0],acts.[1]) | |
let altutils = [|altutil;altutil2;altutil3|] | |
for i in 0..2 do | |
jointRegretSum.[i].[a] <- jointRegretSum.[i].[a] + (altutils.[i] - utils.[i])) actions | |
let actions = [|0..5|] | |
let createStrats = Array.init 3 (fun _ -> Array.create actions.Length 0., Array.create actions.Length 0.) | |
let createStrats2 = Array.create 3 (Array.create actions.Length 0., Array.create actions.Length 0.) | |
trainstep 50_000 rateOutcomeForFirst actions createStrats | |
trainstep 20_000 rateOutcomeForFirst actions createStrats2 | |
let naiveGuesser = [|0.7;0.15;0.1;0.0;0.0;0.05|] | |
printfn "\nAttempt at Nash" | |
Array.iter (fun (s,_) -> Array.zip actions (getAvgStrategy s) |> printStrategy "Guess Strategy") createStrats2 | |
printfn "=====================" | |
printfn "\nRegret Matching on each other`s actions" | |
Array.iter (fun (s,_) -> Array.zip actions (getAvgStrategy s) |> printStrategy "Guess Strategy") createStrats | |
let arrayToTriple (x:_[]) = x.[0],x.[1],x.[2] | |
let swap3b (a,b,c) = b,a,c | |
let swap3c (a,b,c) = c,a,b | |
let runRound () = createStrats2 | |
|> Array.mapi (fun i (s,_) -> | |
if i > 0 then discreteSample naiveGuesser // <== CHANGE the 0 to a 1 or 3 or 2 | |
else (actions, (getAvgStrategy s)) ||> getAction ) | |
|> arrayToTriple | |
|> id // ** <== CHANGE to swap3b** | |
|> rateOutcomeForFirst | |
[|for _ in 0..999 -> runRound()|] | |
|> Array.groupBy id | |
|> Array.map (keepLeft (Array.length>>float)) | |
|> Array.normalizeWeights | |
|> Array.sumBy (joinpair ( * )) | |
|> round 2 | |
|> printfn "\n=========\nWinrate of Strategy 1: %A" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Attempt at Nash
GUESS STRATEGY
GUESS STRATEGY
GUESS STRATEGY
Regret Matching on each other`s actions
GUESS STRATEGY
GUESS STRATEGY
GUESS STRATEGY
=========
Winrate of Strategy 1: 0.2