sir-deenicus · September 13, 2017 23:55 · sir-deenicus · Sep 13, 2017
diff --git a/rps2 output b/rps2 output
 LEARNED STRATEGY

 Move | Move Probability
 ---- | ----------------
 R    | 0               
 P    | 0.999           
 S    | 0               
 T    | 0               

 LEARNED STRATEGY VS FIXED ADVERSARY

 Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
 -------- | -------- | ------ | ------ | ---------- | ------------- | -------------
 P        | R        | 0.999  | 0.4    | 0.399      | 0.399         | -0.399       
 P        | S        | 0.999  | 0.1    | 0.1        | -0.1          | 0.1          
 P        | T        | 0.999  | 0.1    | 0.1        | -0.1          | 0.1          

 Player1: 0.2 | Player2 : -0.2

 ~Strategy EV (10 rounds, $5/round)
 [📰,100%] vs [👊,40% ;📰, 40%; ✂, 10%;  📠,10%] = 12.0

 EQUIL1

 Move | Move Probability
 ---- | ----------------
 R    | 0.332           
 P    | 0.333           
 S    | 0.335           
 T    | 0               

 EQUIL2

 Move | Move Probability
 ---- | ----------------
 R    | 0.333           
 P    | 0.335           
 S    | 0.333           
 T    | 0               

 EQUIL

 Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
 -------- | -------- | ------ | ------ | ---------- | ------------- | -------------
 R        | P        | 0.332  | 0.335  | 0.111      | -0.111        | 0.111        
 R        | S        | 0.332  | 0.333  | 0.11       | 0.11          | -0.11        
 P        | R        | 0.333  | 0.333  | 0.111      | 0.111         | -0.111       
 P        | S        | 0.333  | 0.333  | 0.111      | -0.111        | 0.111        
 S        | R        | 0.335  | 0.333  | 0.111      | -0.111        | 0.111        
 S        | P        | 0.335  | 0.335  | 0.112      | 0.112         | -0.112       

 Player1: 0.0 | Player2 : 0.0

 EQUIL1 VS ADV

 Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
 -------- | -------- | ------ | ------ | ---------- | ------------- | -------------
 R        | P        | 0.334  | 0.2    | 0.067      | -0.067        | 0.067        
 R        | S        | 0.334  | 0.1    | 0.033      | 0.033         | -0.033       
 R        | T        | 0.334  | 0.4    | 0.134      | 0.134         | -0.134       
 P        | R        | 0.331  | 0.3    | 0.099      | 0.099         | -0.099       
 P        | S        | 0.331  | 0.1    | 0.033      | -0.033        | 0.033        
 P        | T        | 0.331  | 0.4    | 0.133      | -0.133        | 0.133        
 S        | R        | 0.335  | 0.3    | 0.1        | -0.1          | 0.1          
 S        | P        | 0.335  | 0.2    | 0.067      | 0.067         | -0.067       
 S        | T        | 0.335  | 0.4    | 0.134      | 0.134         | -0.134       

 Player1: 0.13 | Player2 : -0.13

 ROCK VS EQUIL2

 Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
 -------- | -------- | ------ | ------ | ---------- | ------------- | -------------
 R        | P        | 1      | 0.335  | 0.335      | -0.335        | 0.335        
 R        | S        | 1      | 0.333  | 0.333      | 0.333         | -0.333       

 Player1: 0.0 | Player2 : 0.0
diff --git a/rps2.fsx b/rps2.fsx
 open System

 let random = new Random()  

 module Array =
   let indexed a = Array.mapi (fun i x -> (i,x)) a
   let selectColumn c a = 
       Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a  

 module String = 
   let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " "
   let inline toupper (str:string) = str.ToUpper()
   

 let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s)

 let buildTableRow (collens:_[]) (row:string[]) =
       row |> Array.mapi (fun i s ->  String.pad collens.[i] s) |> joinToStringWith " | "


 let makeTable newline headers title (table:string[][]) =
       let hlen = Array.map String.length headers

       let lens = table |> Array.map (Array.map (String.length))

       let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|]

       let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline

       let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|] 
                  |> Array.map (buildTableRow longest) 
                  |> joinToStringWith newline
       String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0)


 let cdf p = 
    p |> Array.fold (fun (total, list) v -> 
                let cd = v + total
                cd, cd::list) (0., []) 
      |> snd 
      |> Array.ofList 

 let getDiscreteSampleFromCDF (pcdf:float[]) = 
    let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1
    
    let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx
    
    abs(cummProb pcdlen - pcdlen) 

 let discreteSample p = cdf p |> getDiscreteSampleFromCDF  

 let inline pairop op (x,y) (u,v) = (op x u, op y v)

 let round (n:int) (x:float) = Math.Round(x,n)                                                                               
                         
 let selectColumn c a = Array.map (Array.indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a 

 let getStrategy (strategySum:_[]) regretSum =
  let strategy         = Array.map (max 0.)  regretSum 
  let nSum, numactions = Array.sum strategy, strategy.Length
  
  let strategy' = 
      if nSum <= 0. then 
           Array.create numactions (1./float numactions) 
      else Array.map (fun x -> x/nSum) strategy
  
  for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a]
  strategy'


 let getAvgStrategy strategySum =  
  let nSum, numactions = Array.sum strategySum, strategySum.Length           
  if  nSum <= 0. then 
       Array.create numactions (1./float numactions) 
  else Array.map (fun x -> x/nSum) strategySum
   

 let getAction (actions:_[]) strategy = actions.[discreteSample strategy]


 let train1 iters f actions actionDistr regretSum strategySum =
    for _ in 0..iters - 1 do
        let strategy = getStrategy strategySum regretSum
            
        let otheraction = getAction actions actionDistr
        let heroaction  = getAction actions strategy

        let heroutil,_ = f (heroaction,otheraction)

        Array.iteri (fun a action ->  
              let altutil, _ = f (action,otheraction)              
              regretSum.[a] <- regretSum.[a] + (altutil - heroutil)) actions
     

 let train iters f actions regretSum regretSum2 strategySum strategySum2 =
    for _ in 0..iters - 1 do
        let strategy  = getStrategy strategySum regretSum
        let strategy2 = getStrategy strategySum2 regretSum2
    
        let otheraction = getAction actions strategy2
        let heroaction  = getAction actions strategy

        let heroutil,otherutil = f (heroaction,otheraction)

        Array.iteri (fun a action ->  
              let altutil , _ = f (action,otheraction)
              let altutil2, _ = f (action,heroaction)

              regretSum.[a]  <- regretSum.[a]  + (altutil - heroutil)
              regretSum2.[a] <- regretSum2.[a] + (altutil2 - otherutil)) actions


 let inline displayTable2 title roundTableTo roundProbTo keepZeroes winner strat1 strat2 = 
    let buildTable player =
        [|for (move,p) in strat1 do
           for (move2,p2) in strat2 ->
               [|string move; string move2; string (round roundTableTo p) ; 
                 string (round roundTableTo p2); string (round roundTableTo (p * p2)); ""; ""|] , 
               (player (winner (move,move2)) * (p * p2) , 
                player (winner (move2,move)) * (p * p2))|]

    let table = buildTable fst    
    let tabledisp =    Array.filter (fun (_,(p,_))   -> keepZeroes || round roundProbTo p <> 0.) table 
                    |> Array.map    (fun (row,(u,u2)) -> row.[row.Length - 2] <- string (round roundTableTo u)
                                                         row.[row.Length - 1] <- string (round roundTableTo u2); row)   
                                                     
    let astable = makeTable "\n" [|"Player 1";"Player 2";"Prob 1";"Prob 2";"Joint Prob";"Player 1 Util";"Player 2 Util"|] title tabledisp
    
    printfn "\n%s\n\nPlayer1: %A | Player2 : %A" 
             astable (Array.sumBy (snd >> fst) (table) |> round roundProbTo) 
                     (Array.sumBy (snd >> snd) (table) |> round roundProbTo)


 let inline displayTable title winner strat1 strat2 = displayTable2 title 3 2 false winner strat1 strat2 


 let inline printStrategy2 title n strat = 
      let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat      
      let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows      
      
      printfn "\n%s" astable


 let inline printStrategy title strat = printStrategy2 title 3 strat 


 let simulatePlay dodisp rounds bet winner strat1 strat2 =
    let results = 
        [|for _ in 0..rounds - 1 ->
              let move1 = strat1 |> Array.unzip ||> getAction  
              let move2 = strat2 |> Array.unzip ||> getAction 
    
              let res1, res2 = winner (move1, move2)
              res1 * bet, res2 * bet|]

    let p1win,p2win = Array.fold (pairop (+)) (0.,0.) results
    if dodisp then printfn "Player1 wins: %A | Player2 wins: %A" p1win p2win
    p1win,p2win
    

 //////////////////////////////              

 let reset (a:_[]) = for i in 0..a.Length - 1 do a.[i] <- 0.   

 type Moves2 = R | P | S | T

 //The numbers control the utilities for the game. Ensure left and right sum to 0.
 let winner2 =
    function 
      | (R,R) -> ( 0.,0. )
      | (P,R) -> ( 1.,-1.) 
      | (S,R) -> (-1.,1. )
    
      | (T,R) -> (1.,-1. )
    
      | (R,P) -> (-1.,1. )
      | (P,P) -> ( 0.,0. )
      | (S,P) -> ( 1.,-1.)
      
      | (T,P) -> ( 1.,-1.)
      
      | (R,S) -> ( 1.,-1.)
      | (P,S) -> (-1.,1. )
      | (S,S) -> ( 0.,0. ) 
    
      | (T,S) -> ( -1.,1.) 
     
      | (R,T) -> (-1.,1. )
      | (P,T) -> (-1.,1. )
      | (S,T) -> (1.,-1. )
      | (T,T) -> (0.,0.  )

 //The numbers control the utilities for the game. Ensure left and right sum to 0.
 let winner3 =
    function 
      | (R,R) -> ( 0.,0. )
      | (P,R) -> ( 1.,-1.) 
      | (S,R) -> (-1.,1. )
                    //This essentially flips a coin but can change the number to <= 1.
      | (T,R) -> if random.NextDouble() < 0.5 then (1.,-1. ) else (-1.,1. )
    
      | (R,P) -> (-1.,1. )
      | (P,P) -> ( 0.,0. )
      | (S,P) -> ( 1.,-1.)
                 //roll a die with sides 4 or less as wins.
      | (T,P) -> if random.NextDouble() < 4./6. then (1.,-1. ) else (-1.,1. )
      
      | (R,S) -> ( 1.,-1.)
      | (P,S) -> (-1.,1. )
      | (S,S) -> ( 0.,0. ) 
    
      | (T,S) -> (-1.,1.)
     
      | (R,T) -> if random.NextDouble() < 0.5 then (-1.,1. ) else (1.,-1. )
      | (P,T) -> if random.NextDouble() < 4./6. then (-1.,1. ) else (1.,-1. )
      | (S,T) -> (1.,-1.)
      | (T,T) -> (0.,0. )
 //---------------------------

 let regretSumX   = [|0.;0.;0.;0.|]
 let strategySumX = [|0.;0.;0.;0.|]

 let regretSum2X   = [|0.;0.;0.;0.|]
 let strategySum2X = [|0.;0.;0.;0.|]
    
 let rpstStratDef  = Array.zip [|R;P;S;T|] [|0.4;0.4;0.1;0.1|] //<== These numbers can be changed. Ensure sums to 1.
 let rpstStratDef2 = Array.zip [|R;P;S;T|] [|0.3;0.2;0.1;0.4|] //<== These numbers can be changed. Ensure sums to 1. 
 let purePaper     = Array.zip [|R;P;S;T|] [|0.;1.;0.;0.|]   
 let pureRock      = Array.zip [|R;P;S;T|] [|1.;0.;0.;0.|]   
 let pureTelephone = Array.zip [|R;P;S;T|] [|0.;0.;0.;1.|]   

 let winnerFunction = winner3 //<== Change the name of this function to either *winner2* or *winner3* to control what is learnt

 train1 200_000 winnerFunction [|R;P;S;T|] (Array.map snd rpstStratDef) regretSumX strategySumX 

 let rpstStrat1 = Array.zip [|R;P;S;T|] (getAvgStrategy strategySumX)   
 printStrategy "Learned Strategy" rpstStrat1     

 displayTable "Learned Strategy vs Fixed Adversary" winnerFunction rpstStrat1 rpstStratDef //note that displayTable probabilities and expected values aren't quite correct for winner3 

 let avg = [|for _ in 0..99 -> simulatePlay false 10 5. winnerFunction rpstStrat1 rpstStratDef|] |> Array.averageBy fst    
 printfn "\n~Strategy EV (10 rounds, $5/round)\n[📰,100%%] vs [👊,40%% ;📰, 40%%; ✂, 10%%;  📠,10%%] = %A" avg


 //---------------------------

 reset regretSumX; reset strategySumX                  
 train 200_000 winnerFunction [|R;P;S;T|] regretSumX regretSum2X strategySumX strategySum2X

 let rpstStrat1b, rpstStrat2 = 
    Array.zip [|R;P;S;T|] (getAvgStrategy strategySumX) ,
    Array.zip [|R;P;S;T|] (getAvgStrategy strategySum2X) 

 printStrategy "Equil1" rpstStrat1b
 printStrategy "Equil2" rpstStrat2

 displayTable "Equil" winnerFunction rpstStrat1b rpstStrat2

 displayTable "equil1 vs adv" winnerFunction rpstStrat1b rpstStratDef2 
 displayTable "rock vs equil2"  winnerFunction pureRock rpstStrat2
	LEARNED STRATEGY

	Move \| Move Probability
	---- \| ----------------
	R \| 0
	P \| 0.999
	S \| 0
	T \| 0

	LEARNED STRATEGY VS FIXED ADVERSARY

	Player 1 \| Player 2 \| Prob 1 \| Prob 2 \| Joint Prob \| Player 1 Util \| Player 2 Util
	-------- \| -------- \| ------ \| ------ \| ---------- \| ------------- \| -------------
	P \| R \| 0.999 \| 0.4 \| 0.399 \| 0.399 \| -0.399
	P \| S \| 0.999 \| 0.1 \| 0.1 \| -0.1 \| 0.1
	P \| T \| 0.999 \| 0.1 \| 0.1 \| -0.1 \| 0.1

	Player1: 0.2 \| Player2 : -0.2

	~Strategy EV (10 rounds, $5/round)
	[📰,100%] vs [👊,40% ;📰, 40%; ✂, 10%; 📠,10%] = 12.0

	EQUIL1

	Move \| Move Probability
	---- \| ----------------
	R \| 0.332
	P \| 0.333
	S \| 0.335
	T \| 0

	EQUIL2

	Move \| Move Probability
	---- \| ----------------
	R \| 0.333
	P \| 0.335
	S \| 0.333
	T \| 0

	EQUIL

	Player 1 \| Player 2 \| Prob 1 \| Prob 2 \| Joint Prob \| Player 1 Util \| Player 2 Util
	-------- \| -------- \| ------ \| ------ \| ---------- \| ------------- \| -------------
	R \| P \| 0.332 \| 0.335 \| 0.111 \| -0.111 \| 0.111
	R \| S \| 0.332 \| 0.333 \| 0.11 \| 0.11 \| -0.11
	P \| R \| 0.333 \| 0.333 \| 0.111 \| 0.111 \| -0.111
	P \| S \| 0.333 \| 0.333 \| 0.111 \| -0.111 \| 0.111
	S \| R \| 0.335 \| 0.333 \| 0.111 \| -0.111 \| 0.111
	S \| P \| 0.335 \| 0.335 \| 0.112 \| 0.112 \| -0.112

	Player1: 0.0 \| Player2 : 0.0

	EQUIL1 VS ADV

	Player 1 \| Player 2 \| Prob 1 \| Prob 2 \| Joint Prob \| Player 1 Util \| Player 2 Util
	-------- \| -------- \| ------ \| ------ \| ---------- \| ------------- \| -------------
	R \| P \| 0.334 \| 0.2 \| 0.067 \| -0.067 \| 0.067
	R \| S \| 0.334 \| 0.1 \| 0.033 \| 0.033 \| -0.033
	R \| T \| 0.334 \| 0.4 \| 0.134 \| 0.134 \| -0.134
	P \| R \| 0.331 \| 0.3 \| 0.099 \| 0.099 \| -0.099
	P \| S \| 0.331 \| 0.1 \| 0.033 \| -0.033 \| 0.033
	P \| T \| 0.331 \| 0.4 \| 0.133 \| -0.133 \| 0.133
	S \| R \| 0.335 \| 0.3 \| 0.1 \| -0.1 \| 0.1
	S \| P \| 0.335 \| 0.2 \| 0.067 \| 0.067 \| -0.067
	S \| T \| 0.335 \| 0.4 \| 0.134 \| 0.134 \| -0.134

	Player1: 0.13 \| Player2 : -0.13

	ROCK VS EQUIL2

	Player 1 \| Player 2 \| Prob 1 \| Prob 2 \| Joint Prob \| Player 1 Util \| Player 2 Util
	-------- \| -------- \| ------ \| ------ \| ---------- \| ------------- \| -------------
	R \| P \| 1 \| 0.335 \| 0.335 \| -0.335 \| 0.335
	R \| S \| 1 \| 0.333 \| 0.333 \| 0.333 \| -0.333

	Player1: 0.0 \| Player2 : 0.0
	open System

	let random = new Random()

	module Array =
	let indexed a = Array.mapi (fun i x -> (i,x)) a
	let selectColumn c a =
	Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a

	module String =
	let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " "
	let inline toupper (str:string) = str.ToUpper()


	let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s)

	let buildTableRow (collens:_[]) (row:string[]) =
	row \|> Array.mapi (fun i s -> String.pad collens.[i] s) \|> joinToStringWith " \| "


	let makeTable newline headers title (table:string[][]) =
	let hlen = Array.map String.length headers

	let lens = table \|> Array.map (Array.map (String.length))

	let longest = [\|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens \|> Array.map Seq.head \|> Array.max)\|]

	let t0 = table \|> Array.map (buildTableRow longest) \|> joinToStringWith newline

	let hrow = [\|headers; [\|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"\|]\|]
	\|> Array.map (buildTableRow longest)
	\|> joinToStringWith newline
	String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0)


	let cdf p =
	p \|> Array.fold (fun (total, list) v ->
	let cd = v + total
	cd, cd::list) (0., [])
	\|> snd
	\|> Array.ofList

	let getDiscreteSampleFromCDF (pcdf:float[]) =
	let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1

	let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx

	abs(cummProb pcdlen - pcdlen)

	let discreteSample p = cdf p \|> getDiscreteSampleFromCDF

	let inline pairop op (x,y) (u,v) = (op x u, op y v)

	let round (n:int) (x:float) = Math.Round(x,n)

	let selectColumn c a = Array.map (Array.indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a

	let getStrategy (strategySum:_[]) regretSum =
	let strategy = Array.map (max 0.) regretSum
	let nSum, numactions = Array.sum strategy, strategy.Length

	let strategy' =
	if nSum <= 0. then
	Array.create numactions (1./float numactions)
	else Array.map (fun x -> x/nSum) strategy

	for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a]
	strategy'


	let getAvgStrategy strategySum =
	let nSum, numactions = Array.sum strategySum, strategySum.Length
	if nSum <= 0. then
	Array.create numactions (1./float numactions)
	else Array.map (fun x -> x/nSum) strategySum


	let getAction (actions:_[]) strategy = actions.[discreteSample strategy]


	let train1 iters f actions actionDistr regretSum strategySum =
	for _ in 0..iters - 1 do
	let strategy = getStrategy strategySum regretSum

	let otheraction = getAction actions actionDistr
	let heroaction = getAction actions strategy

	let heroutil,_ = f (heroaction,otheraction)

	Array.iteri (fun a action ->
	let altutil, _ = f (action,otheraction)
	regretSum.[a] <- regretSum.[a] + (altutil - heroutil)) actions


	let train iters f actions regretSum regretSum2 strategySum strategySum2 =
	for _ in 0..iters - 1 do
	let strategy = getStrategy strategySum regretSum
	let strategy2 = getStrategy strategySum2 regretSum2

	let otheraction = getAction actions strategy2
	let heroaction = getAction actions strategy

	let heroutil,otherutil = f (heroaction,otheraction)

	Array.iteri (fun a action ->
	let altutil , _ = f (action,otheraction)
	let altutil2, _ = f (action,heroaction)

	regretSum.[a] <- regretSum.[a] + (altutil - heroutil)
	regretSum2.[a] <- regretSum2.[a] + (altutil2 - otherutil)) actions


	let inline displayTable2 title roundTableTo roundProbTo keepZeroes winner strat1 strat2 =
	let buildTable player =
	[\|for (move,p) in strat1 do
	for (move2,p2) in strat2 ->
	[\|string move; string move2; string (round roundTableTo p) ;
	string (round roundTableTo p2); string (round roundTableTo (p * p2)); ""; ""\|] ,
	(player (winner (move,move2)) * (p * p2) ,
	player (winner (move2,move)) * (p * p2))\|]

	let table = buildTable fst
	let tabledisp = Array.filter (fun (_,(p,_)) -> keepZeroes \|\| round roundProbTo p <> 0.) table
	\|> Array.map (fun (row,(u,u2)) -> row.[row.Length - 2] <- string (round roundTableTo u)
	row.[row.Length - 1] <- string (round roundTableTo u2); row)

	let astable = makeTable "\n" [\|"Player 1";"Player 2";"Prob 1";"Prob 2";"Joint Prob";"Player 1 Util";"Player 2 Util"\|] title tabledisp

	printfn "\n%s\n\nPlayer1: %A \| Player2 : %A"
	astable (Array.sumBy (snd >> fst) (table) \|> round roundProbTo)
	(Array.sumBy (snd >> snd) (table) \|> round roundProbTo)


	let inline displayTable title winner strat1 strat2 = displayTable2 title 3 2 false winner strat1 strat2


	let inline printStrategy2 title n strat =
	let rows = Array.map (fun (move,p) -> [\|string move; string(round n p)\|]) strat
	let astable = makeTable "\n" [\|"Move";"Move Probability"\|] title rows

	printfn "\n%s" astable


	let inline printStrategy title strat = printStrategy2 title 3 strat


	let simulatePlay dodisp rounds bet winner strat1 strat2 =
	let results =
	[\|for _ in 0..rounds - 1 ->
	let move1 = strat1 \|> Array.unzip \|\|> getAction
	let move2 = strat2 \|> Array.unzip \|\|> getAction

	let res1, res2 = winner (move1, move2)
	res1 * bet, res2 * bet\|]

	let p1win,p2win = Array.fold (pairop (+)) (0.,0.) results
	if dodisp then printfn "Player1 wins: %A \| Player2 wins: %A" p1win p2win
	p1win,p2win


	//////////////////////////////

	let reset (a:_[]) = for i in 0..a.Length - 1 do a.[i] <- 0.

	type Moves2 = R \| P \| S \| T

	//The numbers control the utilities for the game. Ensure left and right sum to 0.
	let winner2 =
	function
	\| (R,R) -> ( 0.,0. )
	\| (P,R) -> ( 1.,-1.)
	\| (S,R) -> (-1.,1. )

	\| (T,R) -> (1.,-1. )

	\| (R,P) -> (-1.,1. )
	\| (P,P) -> ( 0.,0. )
	\| (S,P) -> ( 1.,-1.)

	\| (T,P) -> ( 1.,-1.)

	\| (R,S) -> ( 1.,-1.)
	\| (P,S) -> (-1.,1. )
	\| (S,S) -> ( 0.,0. )

	\| (T,S) -> ( -1.,1.)

	\| (R,T) -> (-1.,1. )
	\| (P,T) -> (-1.,1. )
	\| (S,T) -> (1.,-1. )
	\| (T,T) -> (0.,0. )

	//The numbers control the utilities for the game. Ensure left and right sum to 0.
	let winner3 =
	function
	\| (R,R) -> ( 0.,0. )
	\| (P,R) -> ( 1.,-1.)
	\| (S,R) -> (-1.,1. )
	//This essentially flips a coin but can change the number to <= 1.
	\| (T,R) -> if random.NextDouble() < 0.5 then (1.,-1. ) else (-1.,1. )

	\| (R,P) -> (-1.,1. )
	\| (P,P) -> ( 0.,0. )
	\| (S,P) -> ( 1.,-1.)
	//roll a die with sides 4 or less as wins.
	\| (T,P) -> if random.NextDouble() < 4./6. then (1.,-1. ) else (-1.,1. )

	\| (R,S) -> ( 1.,-1.)
	\| (P,S) -> (-1.,1. )
	\| (S,S) -> ( 0.,0. )

	\| (T,S) -> (-1.,1.)

	\| (R,T) -> if random.NextDouble() < 0.5 then (-1.,1. ) else (1.,-1. )
	\| (P,T) -> if random.NextDouble() < 4./6. then (-1.,1. ) else (1.,-1. )
	\| (S,T) -> (1.,-1.)
	\| (T,T) -> (0.,0. )
	//---------------------------

	let regretSumX = [\|0.;0.;0.;0.\|]
	let strategySumX = [\|0.;0.;0.;0.\|]

	let regretSum2X = [\|0.;0.;0.;0.\|]
	let strategySum2X = [\|0.;0.;0.;0.\|]

	let rpstStratDef = Array.zip [\|R;P;S;T\|] [\|0.4;0.4;0.1;0.1\|] //<== These numbers can be changed. Ensure sums to 1.
	let rpstStratDef2 = Array.zip [\|R;P;S;T\|] [\|0.3;0.2;0.1;0.4\|] //<== These numbers can be changed. Ensure sums to 1.
	let purePaper = Array.zip [\|R;P;S;T\|] [\|0.;1.;0.;0.\|]
	let pureRock = Array.zip [\|R;P;S;T\|] [\|1.;0.;0.;0.\|]
	let pureTelephone = Array.zip [\|R;P;S;T\|] [\|0.;0.;0.;1.\|]

	let winnerFunction = winner3 //<== Change the name of this function to either winner2 or winner3 to control what is learnt

	train1 200_000 winnerFunction [\|R;P;S;T\|] (Array.map snd rpstStratDef) regretSumX strategySumX

	let rpstStrat1 = Array.zip [\|R;P;S;T\|] (getAvgStrategy strategySumX)
	printStrategy "Learned Strategy" rpstStrat1

	displayTable "Learned Strategy vs Fixed Adversary" winnerFunction rpstStrat1 rpstStratDef //note that displayTable probabilities and expected values aren't quite correct for winner3

	let avg = [\|for _ in 0..99 -> simulatePlay false 10 5. winnerFunction rpstStrat1 rpstStratDef\|] \|> Array.averageBy fst
	printfn "\n~Strategy EV (10 rounds, $5/round)\n[📰,100%%] vs [👊,40%% ;📰, 40%%; ✂, 10%%; 📠,10%%] = %A" avg


	//---------------------------

	reset regretSumX; reset strategySumX
	train 200_000 winnerFunction [\|R;P;S;T\|] regretSumX regretSum2X strategySumX strategySum2X

	let rpstStrat1b, rpstStrat2 =
	Array.zip [\|R;P;S;T\|] (getAvgStrategy strategySumX) ,
	Array.zip [\|R;P;S;T\|] (getAvgStrategy strategySum2X)

	printStrategy "Equil1" rpstStrat1b
	printStrategy "Equil2" rpstStrat2

	displayTable "Equil" winnerFunction rpstStrat1b rpstStrat2

	displayTable "equil1 vs adv" winnerFunction rpstStrat1b rpstStratDef2
	displayTable "rock vs equil2" winnerFunction pureRock rpstStrat2
Player 1	Player 2	Prob 1	Prob 2	Joint Prob	Player 1 Util	Player 2 Util
P	R	0.999	0.4	0.399	0.399	-0.399
P	S	0.999	0.1	0.1	-0.1	0.1
P	T	0.999	0.1	0.1	-0.1	0.1
Player 1	Player 2	Prob 1	Prob 2	Joint Prob	Player 1 Util	Player 2 Util
R	P	0.332	0.335	0.111	-0.111	0.111
R	S	0.332	0.333	0.11	0.11	-0.11
P	R	0.333	0.333	0.111	0.111	-0.111
P	S	0.333	0.333	0.111	-0.111	0.111
S	R	0.335	0.333	0.111	-0.111	0.111
S	P	0.335	0.335	0.112	0.112	-0.112
Player 1	Player 2	Prob 1	Prob 2	Joint Prob	Player 1 Util	Player 2 Util
R	P	0.334	0.2	0.067	-0.067	0.067
R	S	0.334	0.1	0.033	0.033	-0.033
R	T	0.334	0.4	0.134	0.134	-0.134
P	R	0.331	0.3	0.099	0.099	-0.099
P	S	0.331	0.1	0.033	-0.033	0.033
P	T	0.331	0.4	0.133	-0.133	0.133
S	R	0.335	0.3	0.1	-0.1	0.1
S	P	0.335	0.2	0.067	0.067	-0.067
S	T	0.335	0.4	0.134	0.134	-0.134