Created
February 22, 2025 21:11
-
-
Save jonaslsaa/2a845d922d7f13e29bd813bfe4aee872 to your computer and use it in GitHub Desktop.
Tic-Tac-Toe tournament with two MCTS bots, OpenMP parallelization, and performance stats.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <math.h> | |
#include <time.h> | |
#include <omp.h> // Include OpenMP header | |
/********************************************************** | |
* DATA STRUCTURES | |
**********************************************************/ | |
typedef struct { | |
unsigned short X; // Bitboard for player X | |
unsigned short O; // Bitboard for player O | |
char current_player; // 'X' or 'O' | |
} TicTacToeGame; | |
typedef struct { | |
int num_simulations; | |
double exploration_constant; | |
double reward_win; | |
double reward_loss; | |
double reward_draw; | |
} BotConfig; | |
typedef struct MCTSNode { | |
TicTacToeGame state; | |
struct MCTSNode* parent; | |
struct MCTSNode* children[9]; | |
int num_children; | |
int untried_actions[9]; | |
int num_untried_actions; | |
double wins; | |
int visits; | |
char player; | |
int action_from_parent; | |
} MCTSNode; | |
/********************************************************** | |
* FUNCTION DECLARATIONS | |
**********************************************************/ | |
TicTacToeGame make_empty_game(void); | |
TicTacToeGame apply_action(const TicTacToeGame* game, int action); | |
int get_available_actions(const TicTacToeGame* game, int* actions_out); | |
int is_terminal(const TicTacToeGame* game); | |
char check_winner(const TicTacToeGame* game); | |
MCTSNode* create_node(const TicTacToeGame* state, MCTSNode* parent, char player, int action_from_parent); | |
void free_node(MCTSNode* node); | |
int mcts_get_action(const TicTacToeGame* game, const BotConfig* config, int* total_simulations); | |
static MCTSNode* mcts_select(MCTSNode* node, double c); | |
static MCTSNode* mcts_expand(MCTSNode* node); | |
static double mcts_simulate(MCTSNode* node, const BotConfig* config); | |
static void mcts_backpropagate(MCTSNode* node, double result, char rollout_player, const BotConfig* config); | |
static MCTSNode* mcts_best_child(MCTSNode* node, double c); | |
typedef struct { | |
int winsA; | |
int winsB; | |
int draws; | |
int total_simulations; | |
} TournamentResult; | |
int num_of_threads = 1; | |
TournamentResult run_tournament(int num_games, | |
const BotConfig* agentA_config, | |
const BotConfig* agentB_config); | |
/********************************************************** | |
* MAIN | |
**********************************************************/ | |
int main(void) | |
{ | |
srand((unsigned int)time(NULL)); | |
BotConfig agentA_config = {800, 0.5, 1.25, -0.5, 0.45}; | |
BotConfig agentB_config = {1000, sqrt(2.0), 1.0, 0.0, 0.5}; | |
int num_games = 10000; | |
clock_t start_time = clock(); | |
TournamentResult result = run_tournament(num_games, &agentA_config, &agentB_config); | |
clock_t end_time = clock(); | |
printf("Num of threads: %d\n", num_of_threads); | |
double elapsed_time = ((double)(end_time - start_time) / CLOCKS_PER_SEC) / num_of_threads; | |
printf("Results after %d games:\n", num_games); | |
printf("Agent A: %d wins\n", result.winsA); | |
printf("Agent B: %d wins\n", result.winsB); | |
printf("Draws: %d\n", result.draws); | |
double total = (double)num_games; | |
printf("\nPercentages:\n"); | |
printf("Agent A win rate: %.2f%%\n", (result.winsA / total) * 100.0); | |
printf("Agent B win rate: %.2f%%\n", (result.winsB / total) * 100.0); | |
printf("Draw rate: %.2f%%\n", (result.draws / total) * 100.0); | |
printf("\nTotal simulations: %d\n", result.total_simulations); | |
printf("Total time: %.2f seconds\n", elapsed_time); | |
printf("Games per second: %.0f\n", (double)num_games / elapsed_time); | |
printf("Evaluated simulations per second: %.0f\n", (double)result.total_simulations / elapsed_time); | |
return 0; | |
} | |
/********************************************************** | |
* TIC-TAC-TOE GAME IMPLEMENTATION WITH BITBOARD | |
**********************************************************/ | |
// Precomputed winning bitmasks | |
const unsigned short WINNING_MASKS[8] = { | |
0b000000111, // Rows | |
0b000111000, | |
0b111000000, | |
0b001001001, // Columns | |
0b010010010, | |
0b100100100, | |
0b100010001, // Diagonals | |
0b001010100 | |
}; | |
// Initialize an empty game | |
TicTacToeGame make_empty_game(void) { | |
TicTacToeGame game; | |
game.X = 0; | |
game.O = 0; | |
game.current_player = 'X'; | |
return game; | |
} | |
// Apply an action to the game state and return the new state | |
TicTacToeGame apply_action(const TicTacToeGame* game, int action) | |
{ | |
TicTacToeGame new_game = *game; | |
unsigned short move_bit = 1 << action; | |
if (game->current_player == 'X') { | |
new_game.X |= move_bit; | |
} else { | |
new_game.O |= move_bit; | |
} | |
new_game.current_player = (game->current_player == 'X') ? 'O' : 'X'; | |
return new_game; | |
} | |
// Get available actions based on current bitboards | |
int get_available_actions(const TicTacToeGame* game, int* actions_out) | |
{ | |
unsigned short occupied = game->X | game->O; | |
int count = 0; | |
for(int i = 0; i < 9; i++) { | |
if (!(occupied & (1 << i))) { | |
actions_out[count++] = i; | |
} | |
} | |
return count; | |
} | |
// Check if the game has reached a terminal state | |
int is_terminal(const TicTacToeGame* game) | |
{ | |
if (check_winner(game) != 0) { | |
return 1; | |
} | |
// If all positions are occupied | |
if ((game->X | game->O) == 0b111111111) { | |
return 1; | |
} | |
return 0; | |
} | |
// Check for a winner using bitwise operations | |
char check_winner(const TicTacToeGame* game) | |
{ | |
for(int w = 0; w < 8; w++){ | |
if ((game->X & WINNING_MASKS[w]) == WINNING_MASKS[w]){ | |
return 'X'; | |
} | |
if ((game->O & WINNING_MASKS[w]) == WINNING_MASKS[w]){ | |
return 'O'; | |
} | |
} | |
return 0; | |
} | |
/********************************************************** | |
* MCTS NODE HELPERS | |
**********************************************************/ | |
// Create a new MCTS node | |
MCTSNode* create_node(const TicTacToeGame* state, MCTSNode* parent, char player, int action_from_parent) | |
{ | |
MCTSNode* node = (MCTSNode*)calloc(1, sizeof(MCTSNode)); | |
node->state = *state; | |
node->parent = parent; | |
node->num_children = 0; | |
node->wins = 0.0; | |
node->visits = 0; | |
node->player = player; | |
node->action_from_parent = action_from_parent; | |
node->num_untried_actions = get_available_actions(state, node->untried_actions); | |
return node; | |
} | |
// Recursively free a node and its children | |
void free_node(MCTSNode* node) | |
{ | |
if (!node) return; | |
for(int i = 0; i < node->num_children; i++){ | |
free_node(node->children[i]); | |
} | |
free(node); | |
} | |
/********************************************************** | |
* MCTS BOT LOGIC | |
**********************************************************/ | |
// Get the best action using MCTS | |
int mcts_get_action(const TicTacToeGame* game, const BotConfig* config, int* total_simulations) | |
{ | |
int available[9]; | |
int count = get_available_actions(game, available); | |
if (count == 0) { | |
return -1; | |
} | |
MCTSNode* root = create_node(game, NULL, game->current_player, -1); | |
for(int i = 0; i < config->num_simulations; i++){ | |
(*total_simulations)++; | |
MCTSNode* leaf = mcts_select(root, config->exploration_constant); | |
if (!is_terminal(&leaf->state)){ | |
leaf = mcts_expand(leaf); | |
} | |
double result = mcts_simulate(leaf, config); | |
mcts_backpropagate(leaf, result, leaf->player, config); | |
} | |
// Choose the child with the highest visit count | |
MCTSNode* best = NULL; | |
int best_visits = -1; | |
for(int i = 0; i < root->num_children; i++){ | |
if (root->children[i]->visits > best_visits){ | |
best_visits = root->children[i]->visits; | |
best = root->children[i]; | |
} | |
} | |
int best_action = best ? best->action_from_parent : -1; | |
free_node(root); | |
return best_action; | |
} | |
/********************************************************** | |
* MCTS INTERNAL ROUTINES | |
**********************************************************/ | |
// Select a node to explore using UCT | |
static MCTSNode* mcts_select(MCTSNode* node, double c) | |
{ | |
MCTSNode* current = node; | |
while(!is_terminal(¤t->state) && current->num_untried_actions == 0) { | |
current = mcts_best_child(current, c); | |
} | |
return current; | |
} | |
// Expand a node by adding a new child for an untried action | |
static MCTSNode* mcts_expand(MCTSNode* node) | |
{ | |
if (node->num_untried_actions == 0) { | |
return node; | |
} | |
int idx = node->num_untried_actions - 1; | |
int action = node->untried_actions[idx]; | |
node->num_untried_actions--; | |
TicTacToeGame new_state = apply_action(&node->state, action); | |
char child_player = new_state.current_player; | |
MCTSNode* child = create_node(&new_state, node, child_player, action); | |
node->children[node->num_children++] = child; | |
return child; | |
} | |
// Simulate a random playout from the given node | |
static double mcts_simulate(MCTSNode* node, const BotConfig* config) | |
{ | |
TicTacToeGame sim_state = node->state; | |
char rollout_player = node->player; | |
while(!is_terminal(&sim_state)){ | |
int moves[9]; | |
int c2 = get_available_actions(&sim_state, moves); | |
if (c2 == 0) break; // No available moves | |
int action = moves[rand() % c2]; | |
sim_state = apply_action(&sim_state, action); | |
} | |
char w = check_winner(&sim_state); | |
if (w == 0){ | |
return config->reward_draw; | |
} | |
else if (w == rollout_player){ | |
return config->reward_win; | |
} else { | |
return config->reward_loss; | |
} | |
} | |
// Backpropagate the simulation result up the tree | |
static void mcts_backpropagate(MCTSNode* node, double result, char rollout_player, const BotConfig* config) | |
{ | |
MCTSNode* current = node; | |
while(current != NULL){ | |
current->visits += 1; | |
if (current->player == rollout_player) { | |
current->wins += (1.0 - result); | |
} else { | |
current->wins += result; | |
} | |
current = current->parent; | |
} | |
} | |
// Select the best child based on UCT value | |
static MCTSNode* mcts_best_child(MCTSNode* node, double c) | |
{ | |
MCTSNode* best = NULL; | |
double best_uct = -1e30; | |
for(int i = 0; i < node->num_children; i++){ | |
MCTSNode* child = node->children[i]; | |
if (child->visits == 0) { | |
return child; | |
} | |
double exploitation = child->wins / (double)child->visits; | |
double exploration = sqrt(log((double)node->visits) / (double)child->visits); | |
double uct = exploitation + c * exploration; | |
if (uct > best_uct){ | |
best_uct = uct; | |
best = child; | |
} | |
} | |
return best; | |
} | |
/********************************************************** | |
* TOURNAMENT LOGIC WITH OPENMP OPTIMIZATION | |
**********************************************************/ | |
TournamentResult run_tournament(int num_games, | |
const BotConfig* agentA_config, | |
const BotConfig* agentB_config) | |
{ | |
TournamentResult tr = {0, 0, 0, 0}; | |
// Initialize OpenMP | |
#pragma omp parallel | |
{ | |
if (omp_get_thread_num() > num_of_threads) { | |
num_of_threads = omp_get_thread_num(); | |
} | |
// Each thread gets its own seed for rand_r | |
unsigned int seed = (unsigned int)time(NULL) ^ omp_get_thread_num(); | |
// Local accumulators for each thread | |
int local_winsA = 0; | |
int local_winsB = 0; | |
int local_draws = 0; | |
int local_total_simulations = 0; | |
// Parallelize the for loop | |
#pragma omp for schedule(static) | |
for(int g = 0; g < num_games; g++){ | |
int flip = rand_r(&seed) % 2; | |
BotConfig* botX; | |
BotConfig* botO; | |
char nameX; | |
char nameO; | |
if (flip == 0) { | |
botX = (BotConfig*)agentA_config; | |
botO = (BotConfig*)agentB_config; | |
nameX = 'A'; | |
nameO = 'B'; | |
} else { | |
botX = (BotConfig*)agentB_config; | |
botO = (BotConfig*)agentA_config; | |
nameX = 'B'; | |
nameO = 'A'; | |
} | |
TicTacToeGame game = make_empty_game(); | |
while(!is_terminal(&game)){ | |
if (game.current_player == 'X'){ | |
int action = mcts_get_action(&game, botX, &local_total_simulations); | |
if (action < 0) break; | |
game = apply_action(&game, action); | |
} else { | |
int action = mcts_get_action(&game, botO, &local_total_simulations); | |
if (action < 0) break; | |
game = apply_action(&game, action); | |
} | |
} | |
char w = check_winner(&game); | |
if (w == 'X'){ | |
if (nameX == 'A') local_winsA++; else local_winsB++; | |
} else if (w == 'O'){ | |
if (nameO == 'A') local_winsA++; else local_winsB++; | |
} else { | |
local_draws++; | |
} | |
} | |
// Accumulate the local results into the global result | |
#pragma omp atomic | |
tr.winsA += local_winsA; | |
#pragma omp atomic | |
tr.winsB += local_winsB; | |
#pragma omp atomic | |
tr.draws += local_draws; | |
#pragma omp atomic | |
tr.total_simulations += local_total_simulations; | |
} | |
return tr; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment