carlos-aguayo · November 6, 2020 01:47
diff --git a/MCTS.py b/MCTS.py
 # https://github.com/suragnair/alpha-zero-general/blob/5156c7fd1d2f3e5fefe732a4b2e0ffc5b272f819/MCTS.py#L105-L121
 cur_best = -float('inf')
 best_act = -1

 # pick the action with the highest upper confidence bound
 for a in range(self.game.getActionSize()):
    if valids[a]:
        if (s, a) in self.Qsa:
            u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
                    1 + self.Nsa[(s, a)])
        else:
            u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS)  # Q = 0 ?

        if u > cur_best:
            cur_best = u
            best_act = a

 a = best_act
 next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
 next_s = self.game.getCanonicalForm(next_s, next_player)

 # Recursively visit the node
 v = self.search(next_s)
	# https://github.com/suragnair/alpha-zero-general/blob/5156c7fd1d2f3e5fefe732a4b2e0ffc5b272f819/MCTS.py#L105-L121
	cur_best = -float('inf')
	best_act = -1

	# pick the action with the highest upper confidence bound
	for a in range(self.game.getActionSize()):
	if valids[a]:
	if (s, a) in self.Qsa:
	u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
	1 + self.Nsa[(s, a)])
	else:
	u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS) # Q = 0 ?

	if u > cur_best:
	cur_best = u
	best_act = a

	a = best_act
	next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
	next_s = self.game.getCanonicalForm(next_s, next_player)

	# Recursively visit the node
	v = self.search(next_s)