Skip to content

Instantly share code, notes, and snippets.

View suragnair's full-sized avatar

Surag Nair suragnair

View GitHub Profile
def policyIterSP(game):
nnet = initNNet() # initialise random neural network
examples = []
for i in range(numIters):
for e in range(numEps):
examples += executeEpisode(game, nnet) # collect examples from this game
new_nnet = trainNNet(examples)
frac_win = pit(new_nnet, nnet) # compare new net with previous net
if frac_win > threshold:
nnet = new_nnet # replace with new net
@suragnair
suragnair / mcts.py
Last active November 29, 2023 16:15
MCTS for Alpha Zero
def search(s, game, nnet):
if game.gameEnded(s): return -game.gameReward(s)
if s not in visited:
visited.add(s)
P[s], v = nnet.predict(s)
return -v
max_u, best_a = -float("inf"), -1
for a in game.getValidActions(s):