kanzure · April 13, 2015 18:13
diff --git a/README.md b/README.md
diff --git a/ospd.scm b/ospd.scm
 ; Just to start with, code it as a Schelling coordination game.
 ; Added correct scores taken from the real OSPD wiki page.

 ; Added the assumption that the agent maximizes utility via planning-as-inference, bunging the reward through a sigmoid to turn it into a probability.
 ; Added the assumption that the opponent chooses randomly when depth hits zero.
 ;
 ; Then account for the limited levels of Sicilian Reasoning by running each 
 ;   agent with a starting depth of 2.


 ; Cooperate = true, Defect = false
 (define (prisoners-dilemma alice bob)
  (cond
    ((and alice bob) '(-1 . -1))
    ((and (not alice) bob) '(0 . -3))
    ((and alice (not bob)) '(-3 . 0))
    ((and (not alice) (not bob)) '(-2 . -2))))

 (define (sigmoid x) (/ 1 (+ 1 (exp (- x)))))
 (define (reward-distribution r) (flip (sigmoid r)))

 (define (naive-prisoners-dilemma)
  (let ((opponent-move (flip)))
    (rejection-query
      (define move (flip))
      move
      (reward-distribution (car (prisoners-dilemma move opponent-move))))))

 (define (smart-prisoners-dilemma depth)
  (let ((opponent-move (if (= depth 0) (flip) (smart-prisoners-dilemma (- depth 1)))))
    (rejection-query
      (define move (flip))
      move
      (and (equal? move opponent-move) (reward-distribution (car (prisoners-dilemma move opponent-move)))))))

 (hist (repeat 100 (lambda () (smart-prisoners-dilemma 2))))
	; Just to start with, code it as a Schelling coordination game.
	; Added correct scores taken from the real OSPD wiki page.

	; Added the assumption that the agent maximizes utility via planning-as-inference, bunging the reward through a sigmoid to turn it into a probability.
	; Added the assumption that the opponent chooses randomly when depth hits zero.
	;
	; Then account for the limited levels of Sicilian Reasoning by running each
	; agent with a starting depth of 2.


	; Cooperate = true, Defect = false
	(define (prisoners-dilemma alice bob)
	(cond
	((and alice bob) '(-1 . -1))
	((and (not alice) bob) '(0 . -3))
	((and alice (not bob)) '(-3 . 0))
	((and (not alice) (not bob)) '(-2 . -2))))

	(define (sigmoid x) (/ 1 (+ 1 (exp (- x)))))
	(define (reward-distribution r) (flip (sigmoid r)))

	(define (naive-prisoners-dilemma)
	(let ((opponent-move (flip)))
	(rejection-query
	(define move (flip))
	move
	(reward-distribution (car (prisoners-dilemma move opponent-move))))))

	(define (smart-prisoners-dilemma depth)
	(let ((opponent-move (if (= depth 0) (flip) (smart-prisoners-dilemma (- depth 1)))))
	(rejection-query
	(define move (flip))
	move
	(and (equal? move opponent-move) (reward-distribution (car (prisoners-dilemma move opponent-move)))))))

	(hist (repeat 100 (lambda () (smart-prisoners-dilemma 2))))