thomasahle · January 7, 2022 16:01
diff --git a/train.py b/train.py
 def play(r1, r2, replay_buffer):
    privs = [game.make_priv(r1, 0), game.make_priv(r2, 1)]

    def play_inner(state):
        cur = game.get_cur(state)     # Current player id
        calls = game.get_calls(state) # Bets made by player so far

        if calls and calls[-1] == game.LIE_ACTION:
            prev_call = calls[-2] if len(calls) >= 2 else -1
            # If prev_call is good it mean we won (because our opponent called lie)
            res = 1 if game.evaluate_call(r1, r2, prev_call) else -1

        else:
            # Sample a random action based on values from the network
            # We add a +epsilon to the regrets to promote exploration
            action = game.sample_action(privs[cur], state, args.eps)
            new_state = game.apply_action(state, action)
            # Just classic min/max stuff
            res = -play_inner(new_state)

        # Save the result from the perspective of both sides
        replay_buffer.append((privs[cur], state, res))
        replay_buffer.append((privs[1 - cur], state, -res))

        return res

    state = game.make_state()
    play_inner(state)
	def play(r1, r2, replay_buffer):
	privs = [game.make_priv(r1, 0), game.make_priv(r2, 1)]

	def play_inner(state):
	cur = game.get_cur(state) # Current player id
	calls = game.get_calls(state) # Bets made by player so far

	if calls and calls[-1] == game.LIE_ACTION:
	prev_call = calls[-2] if len(calls) >= 2 else -1
	# If prev_call is good it mean we won (because our opponent called lie)
	res = 1 if game.evaluate_call(r1, r2, prev_call) else -1

	else:
	# Sample a random action based on values from the network
	# We add a +epsilon to the regrets to promote exploration
	action = game.sample_action(privs[cur], state, args.eps)
	new_state = game.apply_action(state, action)
	# Just classic min/max stuff
	res = -play_inner(new_state)

	# Save the result from the perspective of both sides
	replay_buffer.append((privs[cur], state, res))
	replay_buffer.append((privs[1 - cur], state, -res))

	return res

	state = game.make_state()
	play_inner(state)