Created
September 27, 2021 15:32
-
-
Save lukego/b7183e95955d3fddd55b1c8c2104ae06 to your computer and use it in GitHub Desktop.
UCT Monte Carlo Tree Search in Lisp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; UCT Monte Carlo Tree Search | |
(defpackage :uct | |
(:use :common-lisp :alexandria :serapeum) | |
(:import-from :nuddy #:⊥)) | |
(in-package :uct) | |
(defstruct search-tree | |
(mode :tree :type (member :tree :playout)) | |
(root-node (make-root-node) :type node) | |
(current-node ⊥ :type node) | |
(exploration-bias 1 :type real)) | |
(defstruct node | |
(parent nil :type (or null node)) | |
(children (dict) :type hash-table) | |
(action ⊥ :type t) | |
(n-visits 0 :type (integer 0 *)) | |
(average-reward 0 :type real)) | |
(defvar *tree* nil | |
"Current search tree.") | |
(defun tree () | |
(or *tree* (setf *tree* (make-tree)))) | |
(defun make-tree () | |
(let ((root (make-root-node))) | |
(make-search-tree :root-node root | |
:current-node root))) | |
(defun make-root-node () | |
(make-node :parent nil :action nil)) | |
(defmacro node () | |
`(search-tree-current-node (tree))) | |
(defun choose (actions) | |
"Choose an element of ACTIONS." | |
(ecase (search-tree-mode (tree)) | |
(:playout | |
(random-elt actions)) | |
(:tree | |
(if-let (new-actions (remove-if #'has-node? actions)) | |
;; Expand | |
(let* ((chosen-action (random-elt new-actions)) | |
(new-node (make-node :parent (node) :action chosen-action))) | |
(setf (@ (node-children (node)) chosen-action) new-node) | |
(setf (node) new-node) | |
(setf (search-tree-mode (tree)) :playout) | |
chosen-action) | |
;; Tree search | |
(node-action (setf (node) (best-child actions))))))) | |
(defun has-node? (action) | |
"Return true if ACTION is already represented by a child node." | |
(true (gethash action (node-children (node))))) | |
(defun best-child (actions) | |
"Return the child node with the greatest estimated value." | |
(extremum (eligible-children actions) #'> | |
:key #'estimated-value)) | |
(defun eligible-children (actions) | |
"Return the list of child nodes that represent a member of ACTIONS." | |
(filter (lambda (x) (member (node-action x) actions :test #'equal)) | |
(hash-table-values (node-children (node))))) | |
(defun estimated-value (node) | |
"Return the estimated value of exploring NODE." | |
(+ (node-average-reward node) | |
(* (search-tree-exploration-bias (tree)) | |
(sqrt (/ (log (node-n-visits node)) | |
(node-n-visits (node-parent node))))))) | |
(defun finish (reward) | |
"Finish the current iteration and offer REWARD for the final result." | |
(loop for node = (node) then (node-parent node) | |
while node | |
do (incf (node-n-visits node)) | |
do (incf (node-average-reward node) | |
(/ (- reward (node-average-reward node)) | |
(node-n-visits node)))) | |
(setf (search-tree-current-node (tree)) (search-tree-root-node (tree))) | |
(setf (search-tree-mode (tree)) :tree)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment