dat-boris · October 10, 2021 03:59
diff --git a/petting_zoo_tutorial.py b/petting_zoo_tutorial.py
 """PettingZoo

 Tutorial from: https://nbviewer.org/github/gsverhoeven/gt_rl_course/blob/master/week_8/marl_tictactoe.ipynb

 Setup:

    pipenv install pettingzoo[classic]
 """

 import random
 import numpy as np
 from collections import defaultdict

 from pettingzoo.classic import tictactoe_v3


 def policy(observation, agent):
    action = random.choice(np.flatnonzero(observation['action_mask']))
    return action


 env = tictactoe_v3.env()

 env.reset()
 for i, agent in enumerate(env.agent_iter()):
    observation, reward, done, info = env.last()
    action = policy(observation, agent) if not done else None
    env.step(action)
    print("Round {}...".format(i))
    env.render()  # this visualizes a single game

 # TODO: about q learning from the tutorial
	"""PettingZoo

	Tutorial from: https://nbviewer.org/github/gsverhoeven/gt_rl_course/blob/master/week_8/marl_tictactoe.ipynb

	Setup:

	pipenv install pettingzoo[classic]
	"""

	import random
	import numpy as np
	from collections import defaultdict

	from pettingzoo.classic import tictactoe_v3


	def policy(observation, agent):
	action = random.choice(np.flatnonzero(observation['action_mask']))
	return action


	env = tictactoe_v3.env()

	env.reset()
	for i, agent in enumerate(env.agent_iter()):
	observation, reward, done, info = env.last()
	action = policy(observation, agent) if not done else None
	env.step(action)
	print("Round {}...".format(i))
	env.render() # this visualizes a single game

	# TODO: about q learning from the tutorial
No results found