sir-deenicus · September 13, 2017 23:21
diff --git a/cfr.fsx b/cfr.fsx
 open Prelude.Common
 open System
 open Prelude.Math
 open Prelude
 open SimpleTrees

 type Node = {    
    regretSum :  double [] ; 
    strategySum : double [];
 }


 let getStrategy weight (strategySum:_[]) regretSum =
  let strategy         = Array.map (max 0.)  regretSum 
  let nSum, numactions = Array.sum strategy, strategy.Length
  
  let strategy' = 
      if nSum <= 0. then 
           Array.create numactions (1./float numactions) 
      else Array.map (fun x -> x/nSum) strategy
  
  for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + weight * strategy'.[a]
  strategy'


 let getAvgStrategy strategySum =  
  let nSum, numactions = Array.sum strategySum, strategySum.Length           
  if  nSum <= 0. then 
       Array.create numactions (1./float numactions) 
  else Array.map (fun x -> x/nSum) strategySum
   

 let newNode n = { regretSum = Array.create n 0.; strategySum = Array.create n 0.}
                                
                                
 let rec cfr d (nodeMap:Dict<_,_>) reward (actions:_[]) (history:string) p0 p1 =
      let player = history.Length % 2
      let nummoves = actions.Length

      let node = 
          match nodeMap.tryFind history with
           | None   -> let n = newNode nummoves in nodeMap.Add(history,n) ; n
           | Some n -> n             
      
      match reward history with
      | Some r -> r
      | None ->
          let strategy = getStrategy (if player = 0 then p0 else p1) node.strategySum node.regretSum
          let util = Array.create nummoves 0.
          let mutable nodeUtil = 0.
         
          for a in 0..nummoves - 1 do 
              let h' = history + actions.[a]
              util.[a] <- if player = 0 then  
                               cfr (d+1) nodeMap reward actions h' (p0 * strategy.[a]) p1
                          else cfr (d+1) nodeMap reward actions h' p0 (p1 * strategy.[a])

              nodeUtil <- nodeUtil + strategy.[a] * util.[a]

          for a in 0..nummoves - 1 do 
              let regret = util.[a] - nodeUtil;
              node.regretSum.[a] <- node.regretSum.[a] + (if player = 0 then p1 else p0) * regret
     
          nodeUtil
	open Prelude.Common
	open System
	open Prelude.Math
	open Prelude
	open SimpleTrees

	type Node = {
	regretSum : double [] ;
	strategySum : double [];
	}


	let getStrategy weight (strategySum:_[]) regretSum =
	let strategy = Array.map (max 0.) regretSum
	let nSum, numactions = Array.sum strategy, strategy.Length

	let strategy' =
	if nSum <= 0. then
	Array.create numactions (1./float numactions)
	else Array.map (fun x -> x/nSum) strategy

	for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + weight * strategy'.[a]
	strategy'


	let getAvgStrategy strategySum =
	let nSum, numactions = Array.sum strategySum, strategySum.Length
	if nSum <= 0. then
	Array.create numactions (1./float numactions)
	else Array.map (fun x -> x/nSum) strategySum


	let newNode n = { regretSum = Array.create n 0.; strategySum = Array.create n 0.}


	let rec cfr d (nodeMap:Dict<_,_>) reward (actions:_[]) (history:string) p0 p1 =
	let player = history.Length % 2
	let nummoves = actions.Length

	let node =
	match nodeMap.tryFind history with
	\| None -> let n = newNode nummoves in nodeMap.Add(history,n) ; n
	\| Some n -> n

	match reward history with
	\| Some r -> r
	\| None ->
	let strategy = getStrategy (if player = 0 then p0 else p1) node.strategySum node.regretSum
	let util = Array.create nummoves 0.
	let mutable nodeUtil = 0.

	for a in 0..nummoves - 1 do
	let h' = history + actions.[a]
	util.[a] <- if player = 0 then
	cfr (d+1) nodeMap reward actions h' (p0 * strategy.[a]) p1
	else cfr (d+1) nodeMap reward actions h' p0 (p1 * strategy.[a])

	nodeUtil <- nodeUtil + strategy.[a] * util.[a]

	for a in 0..nummoves - 1 do
	let regret = util.[a] - nodeUtil;
	node.regretSum.[a] <- node.regretSum.[a] + (if player = 0 then p1 else p0) * regret

	nodeUtil