lukego · September 27, 2021 15:32
diff --git a/uct.lisp b/uct.lisp
 ;;; UCT Monte Carlo Tree Search

 (defpackage :uct
  (:use :common-lisp :alexandria :serapeum)
  (:import-from :nuddy #:⊥))

 (in-package :uct)

 (defstruct search-tree
  (mode             :tree            :type (member :tree :playout))
  (root-node        (make-root-node) :type node)
  (current-node     ⊥ :type node)
  (exploration-bias 1 :type real))

 (defstruct node
  (parent         nil    :type (or null node))
  (children       (dict) :type hash-table)
  (action         ⊥      :type t)
  (n-visits       0      :type (integer 0 *))
  (average-reward 0      :type real))

 (defvar *tree* nil
  "Current search tree.")

 (defun tree ()
  (or *tree* (setf *tree* (make-tree))))

 (defun make-tree ()
  (let ((root (make-root-node)))
    (make-search-tree :root-node root
                      :current-node root)))

 (defun make-root-node ()
  (make-node :parent nil :action nil))

 (defmacro node ()
  `(search-tree-current-node (tree)))

 (defun choose (actions)
  "Choose an element of ACTIONS."
  (ecase (search-tree-mode (tree))
    (:playout
     (random-elt actions))
    (:tree
     (if-let (new-actions (remove-if #'has-node? actions))
       ;; Expand
       (let* ((chosen-action (random-elt new-actions))
              (new-node (make-node :parent (node) :action chosen-action)))
         (setf (@ (node-children (node)) chosen-action) new-node)
         (setf (node) new-node)
         (setf (search-tree-mode (tree)) :playout)
         chosen-action)
       ;; Tree search
       (node-action (setf (node) (best-child actions)))))))

 (defun has-node? (action)
  "Return true if ACTION is already represented by a child node."
  (true (gethash action (node-children (node)))))

 (defun best-child (actions)
  "Return the child node with the greatest estimated value."
  (extremum (eligible-children actions) #'>
            :key #'estimated-value))

 (defun eligible-children (actions)
  "Return the list of child nodes that represent a member of ACTIONS."
  (filter (lambda (x) (member (node-action x) actions :test #'equal))
          (hash-table-values (node-children (node)))))

 (defun estimated-value (node)
  "Return the estimated value of exploring NODE."
  (+ (node-average-reward node)
     (* (search-tree-exploration-bias (tree))
        (sqrt (/ (log (node-n-visits node))
                 (node-n-visits (node-parent node)))))))

 (defun finish (reward)
  "Finish the current iteration and offer REWARD for the final result."
  (loop for node = (node) then (node-parent node)
        while node
        do (incf (node-n-visits node))
        do (incf (node-average-reward node)
                 (/ (- reward (node-average-reward node))
                    (node-n-visits node))))
  (setf (search-tree-current-node (tree)) (search-tree-root-node (tree)))
  (setf (search-tree-mode (tree)) :tree))
	;;; UCT Monte Carlo Tree Search

	(defpackage :uct
	(:use :common-lisp :alexandria :serapeum)
	(:import-from :nuddy #:⊥))

	(in-package :uct)

	(defstruct search-tree
	(mode :tree :type (member :tree :playout))
	(root-node (make-root-node) :type node)
	(current-node ⊥ :type node)
	(exploration-bias 1 :type real))

	(defstruct node
	(parent nil :type (or null node))
	(children (dict) :type hash-table)
	(action ⊥ :type t)
	(n-visits 0 :type (integer 0 *))
	(average-reward 0 :type real))

	(defvar tree nil
	"Current search tree.")

	(defun tree ()
	(or tree (setf tree (make-tree))))

	(defun make-tree ()
	(let ((root (make-root-node)))
	(make-search-tree :root-node root
	:current-node root)))

	(defun make-root-node ()
	(make-node :parent nil :action nil))

	(defmacro node ()
	`(search-tree-current-node (tree)))

	(defun choose (actions)
	"Choose an element of ACTIONS."
	(ecase (search-tree-mode (tree))
	(:playout
	(random-elt actions))
	(:tree
	(if-let (new-actions (remove-if #'has-node? actions))
	;; Expand
	(let* ((chosen-action (random-elt new-actions))
	(new-node (make-node :parent (node) :action chosen-action)))
	(setf (@ (node-children (node)) chosen-action) new-node)
	(setf (node) new-node)
	(setf (search-tree-mode (tree)) :playout)
	chosen-action)
	;; Tree search
	(node-action (setf (node) (best-child actions)))))))

	(defun has-node? (action)
	"Return true if ACTION is already represented by a child node."
	(true (gethash action (node-children (node)))))

	(defun best-child (actions)
	"Return the child node with the greatest estimated value."
	(extremum (eligible-children actions) #'>
	:key #'estimated-value))

	(defun eligible-children (actions)
	"Return the list of child nodes that represent a member of ACTIONS."
	(filter (lambda (x) (member (node-action x) actions :test #'equal))
	(hash-table-values (node-children (node)))))

	(defun estimated-value (node)
	"Return the estimated value of exploring NODE."
	(+ (node-average-reward node)
	(* (search-tree-exploration-bias (tree))
	(sqrt (/ (log (node-n-visits node))
	(node-n-visits (node-parent node)))))))

	(defun finish (reward)
	"Finish the current iteration and offer REWARD for the final result."
	(loop for node = (node) then (node-parent node)
	while node
	do (incf (node-n-visits node))
	do (incf (node-average-reward node)
	(/ (- reward (node-average-reward node))
	(node-n-visits node))))
	(setf (search-tree-current-node (tree)) (search-tree-root-node (tree)))
	(setf (search-tree-mode (tree)) :tree))