Skip to content

Instantly share code, notes, and snippets.

@lukego
Created September 27, 2021 15:32
Show Gist options
  • Save lukego/b7183e95955d3fddd55b1c8c2104ae06 to your computer and use it in GitHub Desktop.
Save lukego/b7183e95955d3fddd55b1c8c2104ae06 to your computer and use it in GitHub Desktop.
UCT Monte Carlo Tree Search in Lisp
;;; UCT Monte Carlo Tree Search
(defpackage :uct
(:use :common-lisp :alexandria :serapeum)
(:import-from :nuddy #:⊥))
(in-package :uct)
(defstruct search-tree
(mode :tree :type (member :tree :playout))
(root-node (make-root-node) :type node)
(current-node ⊥ :type node)
(exploration-bias 1 :type real))
(defstruct node
(parent nil :type (or null node))
(children (dict) :type hash-table)
(action ⊥ :type t)
(n-visits 0 :type (integer 0 *))
(average-reward 0 :type real))
(defvar *tree* nil
"Current search tree.")
(defun tree ()
(or *tree* (setf *tree* (make-tree))))
(defun make-tree ()
(let ((root (make-root-node)))
(make-search-tree :root-node root
:current-node root)))
(defun make-root-node ()
(make-node :parent nil :action nil))
(defmacro node ()
`(search-tree-current-node (tree)))
(defun choose (actions)
"Choose an element of ACTIONS."
(ecase (search-tree-mode (tree))
(:playout
(random-elt actions))
(:tree
(if-let (new-actions (remove-if #'has-node? actions))
;; Expand
(let* ((chosen-action (random-elt new-actions))
(new-node (make-node :parent (node) :action chosen-action)))
(setf (@ (node-children (node)) chosen-action) new-node)
(setf (node) new-node)
(setf (search-tree-mode (tree)) :playout)
chosen-action)
;; Tree search
(node-action (setf (node) (best-child actions)))))))
(defun has-node? (action)
"Return true if ACTION is already represented by a child node."
(true (gethash action (node-children (node)))))
(defun best-child (actions)
"Return the child node with the greatest estimated value."
(extremum (eligible-children actions) #'>
:key #'estimated-value))
(defun eligible-children (actions)
"Return the list of child nodes that represent a member of ACTIONS."
(filter (lambda (x) (member (node-action x) actions :test #'equal))
(hash-table-values (node-children (node)))))
(defun estimated-value (node)
"Return the estimated value of exploring NODE."
(+ (node-average-reward node)
(* (search-tree-exploration-bias (tree))
(sqrt (/ (log (node-n-visits node))
(node-n-visits (node-parent node)))))))
(defun finish (reward)
"Finish the current iteration and offer REWARD for the final result."
(loop for node = (node) then (node-parent node)
while node
do (incf (node-n-visits node))
do (incf (node-average-reward node)
(/ (- reward (node-average-reward node))
(node-n-visits node))))
(setf (search-tree-current-node (tree)) (search-tree-root-node (tree)))
(setf (search-tree-mode (tree)) :tree))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment