Skip to content

Instantly share code, notes, and snippets.

@jonaslsaa
Created February 22, 2025 21:11
Show Gist options
  • Save jonaslsaa/2a845d922d7f13e29bd813bfe4aee872 to your computer and use it in GitHub Desktop.
Save jonaslsaa/2a845d922d7f13e29bd813bfe4aee872 to your computer and use it in GitHub Desktop.
Tic-Tac-Toe tournament with two MCTS bots, OpenMP parallelization, and performance stats.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <omp.h> // Include OpenMP header
/**********************************************************
* DATA STRUCTURES
**********************************************************/
typedef struct {
unsigned short X; // Bitboard for player X
unsigned short O; // Bitboard for player O
char current_player; // 'X' or 'O'
} TicTacToeGame;
typedef struct {
int num_simulations;
double exploration_constant;
double reward_win;
double reward_loss;
double reward_draw;
} BotConfig;
typedef struct MCTSNode {
TicTacToeGame state;
struct MCTSNode* parent;
struct MCTSNode* children[9];
int num_children;
int untried_actions[9];
int num_untried_actions;
double wins;
int visits;
char player;
int action_from_parent;
} MCTSNode;
/**********************************************************
* FUNCTION DECLARATIONS
**********************************************************/
TicTacToeGame make_empty_game(void);
TicTacToeGame apply_action(const TicTacToeGame* game, int action);
int get_available_actions(const TicTacToeGame* game, int* actions_out);
int is_terminal(const TicTacToeGame* game);
char check_winner(const TicTacToeGame* game);
MCTSNode* create_node(const TicTacToeGame* state, MCTSNode* parent, char player, int action_from_parent);
void free_node(MCTSNode* node);
int mcts_get_action(const TicTacToeGame* game, const BotConfig* config, int* total_simulations);
static MCTSNode* mcts_select(MCTSNode* node, double c);
static MCTSNode* mcts_expand(MCTSNode* node);
static double mcts_simulate(MCTSNode* node, const BotConfig* config);
static void mcts_backpropagate(MCTSNode* node, double result, char rollout_player, const BotConfig* config);
static MCTSNode* mcts_best_child(MCTSNode* node, double c);
typedef struct {
int winsA;
int winsB;
int draws;
int total_simulations;
} TournamentResult;
int num_of_threads = 1;
TournamentResult run_tournament(int num_games,
const BotConfig* agentA_config,
const BotConfig* agentB_config);
/**********************************************************
* MAIN
**********************************************************/
int main(void)
{
srand((unsigned int)time(NULL));
BotConfig agentA_config = {800, 0.5, 1.25, -0.5, 0.45};
BotConfig agentB_config = {1000, sqrt(2.0), 1.0, 0.0, 0.5};
int num_games = 10000;
clock_t start_time = clock();
TournamentResult result = run_tournament(num_games, &agentA_config, &agentB_config);
clock_t end_time = clock();
printf("Num of threads: %d\n", num_of_threads);
double elapsed_time = ((double)(end_time - start_time) / CLOCKS_PER_SEC) / num_of_threads;
printf("Results after %d games:\n", num_games);
printf("Agent A: %d wins\n", result.winsA);
printf("Agent B: %d wins\n", result.winsB);
printf("Draws: %d\n", result.draws);
double total = (double)num_games;
printf("\nPercentages:\n");
printf("Agent A win rate: %.2f%%\n", (result.winsA / total) * 100.0);
printf("Agent B win rate: %.2f%%\n", (result.winsB / total) * 100.0);
printf("Draw rate: %.2f%%\n", (result.draws / total) * 100.0);
printf("\nTotal simulations: %d\n", result.total_simulations);
printf("Total time: %.2f seconds\n", elapsed_time);
printf("Games per second: %.0f\n", (double)num_games / elapsed_time);
printf("Evaluated simulations per second: %.0f\n", (double)result.total_simulations / elapsed_time);
return 0;
}
/**********************************************************
* TIC-TAC-TOE GAME IMPLEMENTATION WITH BITBOARD
**********************************************************/
// Precomputed winning bitmasks
const unsigned short WINNING_MASKS[8] = {
0b000000111, // Rows
0b000111000,
0b111000000,
0b001001001, // Columns
0b010010010,
0b100100100,
0b100010001, // Diagonals
0b001010100
};
// Initialize an empty game
TicTacToeGame make_empty_game(void) {
TicTacToeGame game;
game.X = 0;
game.O = 0;
game.current_player = 'X';
return game;
}
// Apply an action to the game state and return the new state
TicTacToeGame apply_action(const TicTacToeGame* game, int action)
{
TicTacToeGame new_game = *game;
unsigned short move_bit = 1 << action;
if (game->current_player == 'X') {
new_game.X |= move_bit;
} else {
new_game.O |= move_bit;
}
new_game.current_player = (game->current_player == 'X') ? 'O' : 'X';
return new_game;
}
// Get available actions based on current bitboards
int get_available_actions(const TicTacToeGame* game, int* actions_out)
{
unsigned short occupied = game->X | game->O;
int count = 0;
for(int i = 0; i < 9; i++) {
if (!(occupied & (1 << i))) {
actions_out[count++] = i;
}
}
return count;
}
// Check if the game has reached a terminal state
int is_terminal(const TicTacToeGame* game)
{
if (check_winner(game) != 0) {
return 1;
}
// If all positions are occupied
if ((game->X | game->O) == 0b111111111) {
return 1;
}
return 0;
}
// Check for a winner using bitwise operations
char check_winner(const TicTacToeGame* game)
{
for(int w = 0; w < 8; w++){
if ((game->X & WINNING_MASKS[w]) == WINNING_MASKS[w]){
return 'X';
}
if ((game->O & WINNING_MASKS[w]) == WINNING_MASKS[w]){
return 'O';
}
}
return 0;
}
/**********************************************************
* MCTS NODE HELPERS
**********************************************************/
// Create a new MCTS node
MCTSNode* create_node(const TicTacToeGame* state, MCTSNode* parent, char player, int action_from_parent)
{
MCTSNode* node = (MCTSNode*)calloc(1, sizeof(MCTSNode));
node->state = *state;
node->parent = parent;
node->num_children = 0;
node->wins = 0.0;
node->visits = 0;
node->player = player;
node->action_from_parent = action_from_parent;
node->num_untried_actions = get_available_actions(state, node->untried_actions);
return node;
}
// Recursively free a node and its children
void free_node(MCTSNode* node)
{
if (!node) return;
for(int i = 0; i < node->num_children; i++){
free_node(node->children[i]);
}
free(node);
}
/**********************************************************
* MCTS BOT LOGIC
**********************************************************/
// Get the best action using MCTS
int mcts_get_action(const TicTacToeGame* game, const BotConfig* config, int* total_simulations)
{
int available[9];
int count = get_available_actions(game, available);
if (count == 0) {
return -1;
}
MCTSNode* root = create_node(game, NULL, game->current_player, -1);
for(int i = 0; i < config->num_simulations; i++){
(*total_simulations)++;
MCTSNode* leaf = mcts_select(root, config->exploration_constant);
if (!is_terminal(&leaf->state)){
leaf = mcts_expand(leaf);
}
double result = mcts_simulate(leaf, config);
mcts_backpropagate(leaf, result, leaf->player, config);
}
// Choose the child with the highest visit count
MCTSNode* best = NULL;
int best_visits = -1;
for(int i = 0; i < root->num_children; i++){
if (root->children[i]->visits > best_visits){
best_visits = root->children[i]->visits;
best = root->children[i];
}
}
int best_action = best ? best->action_from_parent : -1;
free_node(root);
return best_action;
}
/**********************************************************
* MCTS INTERNAL ROUTINES
**********************************************************/
// Select a node to explore using UCT
static MCTSNode* mcts_select(MCTSNode* node, double c)
{
MCTSNode* current = node;
while(!is_terminal(&current->state) && current->num_untried_actions == 0) {
current = mcts_best_child(current, c);
}
return current;
}
// Expand a node by adding a new child for an untried action
static MCTSNode* mcts_expand(MCTSNode* node)
{
if (node->num_untried_actions == 0) {
return node;
}
int idx = node->num_untried_actions - 1;
int action = node->untried_actions[idx];
node->num_untried_actions--;
TicTacToeGame new_state = apply_action(&node->state, action);
char child_player = new_state.current_player;
MCTSNode* child = create_node(&new_state, node, child_player, action);
node->children[node->num_children++] = child;
return child;
}
// Simulate a random playout from the given node
static double mcts_simulate(MCTSNode* node, const BotConfig* config)
{
TicTacToeGame sim_state = node->state;
char rollout_player = node->player;
while(!is_terminal(&sim_state)){
int moves[9];
int c2 = get_available_actions(&sim_state, moves);
if (c2 == 0) break; // No available moves
int action = moves[rand() % c2];
sim_state = apply_action(&sim_state, action);
}
char w = check_winner(&sim_state);
if (w == 0){
return config->reward_draw;
}
else if (w == rollout_player){
return config->reward_win;
} else {
return config->reward_loss;
}
}
// Backpropagate the simulation result up the tree
static void mcts_backpropagate(MCTSNode* node, double result, char rollout_player, const BotConfig* config)
{
MCTSNode* current = node;
while(current != NULL){
current->visits += 1;
if (current->player == rollout_player) {
current->wins += (1.0 - result);
} else {
current->wins += result;
}
current = current->parent;
}
}
// Select the best child based on UCT value
static MCTSNode* mcts_best_child(MCTSNode* node, double c)
{
MCTSNode* best = NULL;
double best_uct = -1e30;
for(int i = 0; i < node->num_children; i++){
MCTSNode* child = node->children[i];
if (child->visits == 0) {
return child;
}
double exploitation = child->wins / (double)child->visits;
double exploration = sqrt(log((double)node->visits) / (double)child->visits);
double uct = exploitation + c * exploration;
if (uct > best_uct){
best_uct = uct;
best = child;
}
}
return best;
}
/**********************************************************
* TOURNAMENT LOGIC WITH OPENMP OPTIMIZATION
**********************************************************/
TournamentResult run_tournament(int num_games,
const BotConfig* agentA_config,
const BotConfig* agentB_config)
{
TournamentResult tr = {0, 0, 0, 0};
// Initialize OpenMP
#pragma omp parallel
{
if (omp_get_thread_num() > num_of_threads) {
num_of_threads = omp_get_thread_num();
}
// Each thread gets its own seed for rand_r
unsigned int seed = (unsigned int)time(NULL) ^ omp_get_thread_num();
// Local accumulators for each thread
int local_winsA = 0;
int local_winsB = 0;
int local_draws = 0;
int local_total_simulations = 0;
// Parallelize the for loop
#pragma omp for schedule(static)
for(int g = 0; g < num_games; g++){
int flip = rand_r(&seed) % 2;
BotConfig* botX;
BotConfig* botO;
char nameX;
char nameO;
if (flip == 0) {
botX = (BotConfig*)agentA_config;
botO = (BotConfig*)agentB_config;
nameX = 'A';
nameO = 'B';
} else {
botX = (BotConfig*)agentB_config;
botO = (BotConfig*)agentA_config;
nameX = 'B';
nameO = 'A';
}
TicTacToeGame game = make_empty_game();
while(!is_terminal(&game)){
if (game.current_player == 'X'){
int action = mcts_get_action(&game, botX, &local_total_simulations);
if (action < 0) break;
game = apply_action(&game, action);
} else {
int action = mcts_get_action(&game, botO, &local_total_simulations);
if (action < 0) break;
game = apply_action(&game, action);
}
}
char w = check_winner(&game);
if (w == 'X'){
if (nameX == 'A') local_winsA++; else local_winsB++;
} else if (w == 'O'){
if (nameO == 'A') local_winsA++; else local_winsB++;
} else {
local_draws++;
}
}
// Accumulate the local results into the global result
#pragma omp atomic
tr.winsA += local_winsA;
#pragma omp atomic
tr.winsB += local_winsB;
#pragma omp atomic
tr.draws += local_draws;
#pragma omp atomic
tr.total_simulations += local_total_simulations;
}
return tr;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment