gelisam · July 29, 2023 07:23
diff --git a/ValueBasedDebugging.hs b/ValueBasedDebugging.hs
 -- A proof of concept for a value-based debugging technique: instead of
 -- debugging by imperatively stepping through the evaluation, we instead
 -- produce a value representing the evaluation tree.
 --
 -- This value can then be manipulated like any other value, e.g. instead adding
 -- a breakpoint and stepping until the breakpoint is hit, you can write an
 -- ordinary function which recurs into the evaluation tree until it finds a node
 -- of interest. This way, more complex conditions can be easily expressed, e.g.
 -- "find the smallest subtree in which a call of 'double' does not result in an
 -- output which is the double of its input".

 {-# LANGUAGE PatternGuards #-}
 module ValueBasedDebugging where

 import Data.Foldable (minimumBy)
 import Data.Ord (comparing)


 ---------------------
 -- Lambda calculus --
 ---------------------

 -- For this proof of concept, we use a simple lambda calculus with integers and
 -- addition. Everything in this section is standard.

 data Term
  = Var String
  | Lam String Term
  | App Term Term
  | IntLit Int
  | Add Term Term
  deriving (Eq, Show)

 data Value
  = Closure Env String Term
  | IntVal Int
  deriving (Eq, Show)

 type Env = [(String, Value)]

 -- Our language does not have a 'let' construct, but it is easy to define one in
 -- terms of the constructs it does have.
 let_
  :: String
  -> Term
  -> Term
  -> Term
 let_ x xDefinition body
  = App (Lam x body) xDefinition

 -- An example program with an intentional bug. We are trying to calculate
 -- @2*2*2*2 = 16@, but we accidentally defined @double x = x + 2@ instead of
 -- @double x = x + x@, so the result is @2+2+2+2 = 8@.
 example
  :: Term
 example
  = let_ "double" (Lam "x" (Add (Var "x")
                                (IntLit 2)))  -- bug!
  $ App (Var "double")
  $ App (Var "double")
  $ App (Var "double")
  $ IntLit 2

 -- |
 -- Evaluation can either succeed with a value, or fail with an error message.
 --
 -- >>> eval $ Add (IntLit 2) (IntLit 3)
 -- Right (IntVal 5)
 -- >>> eval example
 -- Right (IntVal 8)
 -- >>> eval $ Add (IntLit 2) (Var "x")
 -- Left "Variable \"x\" not in scope"
 evalWith
  :: Env
  -> Term
  -> Either String Value
 evalWith env (Var x) = case lookup x env of
  Just v -> do
    pure v
  Nothing -> do
    Left $ "Variable " ++ show x ++ " not in scope"
 evalWith env (Lam x body) = do
  pure $ Closure env x body
 evalWith env (App f e) = do
  fv <- evalWith env f
  ev <- evalWith env e
  case fv of
    Closure capturedEnv x body -> do
      evalWith ((x,ev) : capturedEnv) body
    IntVal n -> do
      Left $ "Integer " ++ show n ++ " is not a function"
 evalWith _ (IntLit n) = do
  pure $ IntVal n
 evalWith env (Add e1 e2) = do
  v1 <- evalWith env e1
  v2 <- evalWith env e2
  case v1 of
    IntVal n1 -> case v2 of
      IntVal n2 -> do
        pure $ IntVal (n1 + n2)
      Closure {} -> do
        error "Functions cannot be added"
    Closure {} -> do
      error "Functions cannot be added"

 eval
  :: Term
  -> Either String Value
 eval
  = evalWith []


 --------------------------
 -- Value-based debugger --
 --------------------------

 -- At each node of the evaluation tree, an input term is evaluated to an output
 -- value in an environment. It is possible to look at the output right away, or
 -- to dive down the 'details' field to look at the first evaluation step towards
 -- that output.
 data EvalTree = EvalTree
  { environment
      :: Env
  , input
      :: Term
  , output
      :: Either String Value
  , details
      :: EvalStep
  }
  deriving Show

 -- Some input terms are already values, and thus do not require any further
 -- evaluation. Other terms require the evaluation of multiple sub-terms, which
 -- is why the evaluation tree is a tree.
 --
 -- While 'eval' chooses to evaluate those sub-terms in a specific order, for
 -- debugging it is more convenient to allow the user to choose which sub-term to
 -- explore next.
 data EvalStep
  = EvalVar
  | EvalLam
  | EvalApp AppDetails
  | EvalIntLit
  | EvalAdd EvalTree EvalTree
  deriving Show

 -- Ideally, 'appFun' evaluates to (the closure representation of) @Lam "x" body@,
 -- in which case 'appArgName' is @Just "x"@ and 'appResult' is the evaluation
 -- tree for @body@ in a extended environment in which @x@ is bound to the output
 -- of 'appArg'. However, another possibility is that the call is about to fail
 -- with a type error, because 'appFun' does not evaluate to a function. In that
 -- case, 'appArgName' and 'appResult' are both 'Nothing'.
 data AppDetails = AppDetails
  { appFun
      :: EvalTree
  , appArgName
      :: Maybe String
  , appArg
      :: EvalTree
  , appResult
      :: Maybe EvalTree
  }
  deriving Show

 -- |
 -- Evaluate an input term into an output value, while recording the evaluation
 -- tree. Note that the output value is computed using the 'evalWith' function
 -- from the previous section, _not_ by calculating the entire evaluation tree
 -- and then throwing away the debugging information. This allows the user to
 -- take advantage of laziness, by only generating debugging information for the
 -- parts of the evaluation tree that are actually needed.
 --
 -- For example, here is a computation consisting of two sub-expressions. We will
 -- generate debugging information for the @3 + 4@ sub-expression but not for the
 -- @1 + 2@ sub-expression.
 --
 -- >>> node1234 = debug $ (IntLit 1 `Add` IntLit 2) `Add` (IntLit 3 `Add` IntLit 4)
 --
 -- Step into the overall expression:
 -- >>> EvalAdd node12 node34 = details node1234
 --
 -- Step over the @1 + 2@ sub-expression:
 -- >>> output node12
 -- Right (IntVal 3)
 --
 -- Step into the @3 + 4@ sub-expression:
 -- >>> EvalAdd node3 node4 = details node34
 --
 -- Evaluate the leaves:
 -- >>> output node3
 -- Right (IntVal 3)
 -- >>> output node4
 -- Right (IntVal 4)
 --
 -- Step out of the @3 + 4@ sub-expression:
 -- >>> output node34
 -- Right (IntVal 7)

 -- Step out of the overall expression:
 -- >>> output node1234
 -- Right (IntVal 10)
 debugWith
  :: Env
  -> Term
  -> EvalTree
 debugWith env term = EvalTree
  { environment
      = env
  , input
      = term
  , output
      = evalWith env term
  , details
      = go term
  }
  where
    go
      :: Term
      -> EvalStep
    go (Var {})
      = EvalVar
    go (Lam {})
      = EvalLam
    go (App funcTerm argTerm)
      = let funcNode = debugWith env funcTerm
            argNode = debugWith env argTerm
     in EvalApp $ AppDetails
          { appFun
              = funcNode
          , appArgName
              = do
                  Right (Closure _ argName _) <- pure $ output funcNode
                  pure argName
          , appArg
              = argNode
          , appResult
              = do
                  Right (Closure capturedEnv argName body) <- pure $ output funcNode
                  Right argValue <- pure $ output argNode
                  pure $ debugWith ((argName, argValue) : capturedEnv) body
          }
    go (IntLit {})
      = EvalIntLit
    go (Add e1 e2)
      = let node1 = debugWith env e1
            node2 = debugWith env e2
     in EvalAdd node1 node2

 debug
  :: Term
  -> EvalTree
 debug = debugWith []


 -----------------
 -- Breakpoints --
 -----------------

 exampleEvalTree
  :: EvalTree
 exampleEvalTree
  = debug example

 -- We've seen what the usual "step into", "step over", and "step out" commands
 -- of a debugger translate to in the value-based debugging world. What about
 -- breakpoints? In an imperative debugger, running up to a breakpoint means to
 -- automatically step into and out of sub-calls until a particular line is
 -- encountered. Or, for data breakpoints, until a particular variable is set to
 -- a value which matches a particular predicate. In a value-based debugger, we
 -- generalize the concept: a breakpoint is a predicate on the evaluation tree.
 -- The predicate can look at the line number (or it could if our term
 -- representation had line numbers), at the values of the variables in the
 -- environment, or at anything else which can be computed from the evaluation
 -- tree.

 -- |
 -- >>> length $ allNodes $ debug $ Add (IntLit 2) (IntLit 3)
 -- 3
 allNodes
  :: EvalTree
  -> [EvalTree]
 allNodes node
  = node
  : case details node of
      EvalVar
        -> []
      EvalLam
        -> []
      EvalApp (AppDetails f _ arg result)
        -> allNodes f ++ allNodes arg ++ maybe [] allNodes result
      EvalIntLit
        -> []
      EvalAdd e1 e2
        -> allNodes e1 ++ allNodes e2

 -- |
 -- Look for points in the evaluation where a particular variable is set to a
 -- value of interest.
 --
 -- >>> nodes = allNodes exampleEvalTree
 -- >>> matches = filter (dataBreakpoint "x" (== IntVal 4)) nodes
 -- >>> mapM_ (putStrLn . prettyEval) matches
 -- (x + 2) --> 6
 -- x --> 4
 -- 2 --> 2
 dataBreakpoint
  :: String
  -> (Value -> Bool)
  -> EvalTree -> Bool
 dataBreakpoint varName predicate node
  | Just value <- lookup varName (environment node)
    = predicate value
  | otherwise
    = False


 ------------------------
 -- Sub-program finder --
 ------------------------

 -- Being able to write an _arbitrary_ predicate, instead of being limited to
 -- line numbers and data breakpoints, unlocks a new world of debugging queries.
 --
 -- For example, our example program is producing the wrong answer, but let's
 -- pretend that the code is so large that we can't understand why just by
 -- looking at the code. Let's use the evaluation tree to simplify the problem,
 -- by finding the smaller sub-program which also produces the wrong answer.
 --
 -- To do this, we will use an oracle which always produces the right answer. At
 -- first glance, it might sound silly to assume that we have such an oracle,
 -- because if we had one, then we wouldn't need to debug our program, we could
 -- just trash our program and use our oracle instead. But oracles are actually a
 -- common technique for testing refactorings and optimizations: the oracle is
 -- simply the program before the refactoring, or the program without the
 -- optimization.

 -- |
 -- >>> isCallTo "double" $ App (Var "double") (IntLit 2)
 -- True
 -- >>> isCallTo "double" $ App (Var "triple") (IntLit 2)
 -- False
 isCallTo
  :: String
  -> Term -> Bool
 isCallTo expectedFuncName (App (Var actualFuncName) _)
  = actualFuncName == expectedFuncName
 isCallTo _ _
  = False

 -- |
 -- >>> mapM_ (putStrLn . prettyEval) doubleCalls
 -- (double (double (double 2))) --> 8
 -- (double (double 2)) --> 6
 -- (double 2) --> 4
 doubleCalls
  :: [EvalTree]
 doubleCalls
  = filter (isCallTo "double" . input)
  $ allNodes exampleEvalTree

 -- |
 -- >>> mismatches = filter (not . matchesOracle (* 2)) doubleCalls
 -- >>> mapM_ (putStrLn . prettyEval) mismatches
 -- (double (double (double 2))) --> 8
 -- (double (double 2)) --> 6
 matchesOracle
  :: (Int -> Int)
  -> EvalTree -> Bool
 matchesOracle oracle node
  | EvalApp (AppDetails _ _ argNode _) <- details node
  , Right (IntVal argInt) <- output argNode
  , Right (IntVal actualOutput) <- output node
    = actualOutput == oracle argInt
  | otherwise
    = False

 -- |
 -- >>> size $ Add (IntLit 2) (IntLit 3)
 -- 3
 -- >>> map (size . input) doubleCalls
 -- [7,5,3]
 size
  :: Term
  -> Int
 size (Var {})
  = 1
 size (Lam _ body)
  = 1 + size body
 size (App f e)
  = 1 + size f + size e
 size (IntLit {})
  = 1
 size (Add e1 e2)
  = 1 + size e1 + size e2

 -- |
 -- >>> putStrLn $ prettyEval smallestSubprogramDemonstratingTheBug
 -- (double (double 2)) --> 6
 smallestSubprogramDemonstratingTheBug
  :: EvalTree
 smallestSubprogramDemonstratingTheBug
  = minimumBy (comparing (size . input))
  $ filter (not . matchesOracle (* 2))
  $ doubleCalls

 -- Tada! We've found the smallest sub-program which demonstrates the bug. Isn't
 -- value-based debugging amazing?


 ------------------------
 -- Minimal repro case --
 ------------------------

 -- But we are not done yet! We didn't perform a series of actions on an
 -- ephemeral instance of our program, we wrote a _function_, and functions are
 -- reusable. This means we can use this same function to debug the next program.
 -- Or a variant of the current program. That's right, since the evaluation tree
 -- is an ordinary value, this means we are not limited to debugging the program
 -- as it is, we can also generate alternative implementations on the fly and
 -- examine what would have been if we had written the program differently.
 --
 -- For example, let's say that the sub-program we found is still too complex.
 -- We can make systematic simplifications to the program and test whether it
 -- still exhibits the buggy behaviour. If any further simplification causes the
 -- buggy behaviour to go away, then we have found a minimal repro case.

 -- |
 -- Each result makes a single simplification to the code.
 -- Returns the empty list if no further simplifications are possible.
 --
 -- >>> simplerTerms = simplify $ Add (IntLit 2) (IntLit 3) 
 -- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
 -- 2
 -- 3
 -- (1 + 3)
 -- (2 + 2)
 --
 -- >>> simplerTerms = simplify example
 -- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
 -- ((\double -> (double (double 4))) (\x -> (x + 2)))
 -- ((\double -> (double (double (double 1)))) (\x -> (x + 2)))
 -- ((\double -> (double (double (double 2)))) (\x -> x))
 -- ((\double -> (double (double (double 2)))) (\x -> 2))
 -- ((\double -> (double (double (double 2)))) (\x -> (x + 1)))
 --
 -- >>> simplerTerms = simplify $ input smallestSubprogramDemonstratingTheBug
 -- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
 -- (double 4)
 -- (double (double 1))
 simplify
  :: Term
  -> [Term]
 simplify (Var {})
  = []
 simplify (Lam x body)
  = [Lam x body' | body' <- simplify body]
 simplify (App f@(Var "double") e@(IntLit n))
  = [IntLit (2 * n)]  -- replace a sub-call with the oracle's correct result
 ++ [ App f' e | f' <- simplify f ]
 ++ [ App f e' | e' <- simplify e ]
 simplify (App f e)
  = [ App f' e | f' <- simplify f ]
 ++ [ App f e' | e' <- simplify e ]
 simplify (IntLit n)
  | n > 0
    = [IntLit (n-1)]
  | n < 0
    = [IntLit (n+1)]
  | otherwise
    = []
 simplify (Add e1 e2)
  = [e1, e2]
 ++ [ Add e1' e2 | e1' <- simplify e1 ]
 ++ [ Add e1 e2' | e2' <- simplify e2 ]

 -- |
 -- >>> env = environment smallestSubprogramDemonstratingTheBug
 -- >>> term = input smallestSubprogramDemonstratingTheBug
 -- >>> incorrect = not . matchesOracle (* 2) . debugWith env
 -- >>> predicate term = isCallTo "double" term && incorrect term
 -- >>> reproCase = smallestReproCase predicate term
 -- >>> putStrLn $ prettyEval $ debugWith env reproCase
 -- (double 3) --> 5
 smallestReproCase
  :: (Term -> Bool)  -- True if the program exhibits the bug
  -> Term  -- the predicate must return True for this original term
  -> Term
 smallestReproCase predicate originalTerm
  = case filter predicate $ simplify originalTerm of
      simplerTerm : _
        -> smallestReproCase predicate simplerTerm
      []
        -> originalTerm

 -- Tada! We've found the smallest repro case. Isn't value-based debugging
 -- amazing? Let's implement something like this for a non-toy language!


 ---------------------
 -- Pretty printing --
 ---------------------

 -- Nothing interesting in the remainder of this file, just pretty printing
 -- functions to make the output of the examples more readable.

 -- |
 -- >>> putStrLn $ prettyTerm $ Add (IntLit 2) (IntLit 3)
 -- (2 + 3)
 -- >>> putStrLn $ prettyTerm example
 -- ((\double -> (double (double (double 2)))) (\x -> (x + 2)))
 prettyTerm
  :: Term
  -> String
 prettyTerm (Var x)
  = x
 prettyTerm (Lam x body)
  = "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
 prettyTerm (App f e)
  = "(" ++ prettyTerm f ++ " " ++ prettyTerm e ++ ")"
 prettyTerm (IntLit n)
  = show n
 prettyTerm (Add e1 e2)
  = "(" ++ prettyTerm e1 ++ " + " ++ prettyTerm e2 ++ ")"

 -- |
 -- >>> putStrLn $ prettyValue $ IntVal 2
 -- 2
 -- >>> putStrLn $ prettyValue $ Closure [] "x" $ Add (Var "x") (IntLit 2)
 -- (\x -> (x + 2))
 prettyValue
  :: Value
  -> String
 prettyValue (Closure _ x body)
  = "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
 prettyValue (IntVal n)
  = show n

 -- |
 -- >>> putStrLn $ prettyResult $ eval $ Add (IntLit 2) (IntLit 3)
 -- 5
 -- >>> putStrLn $ prettyResult $ eval example
 -- 8
 prettyResult
  :: Either String Value
  -> String
 prettyResult (Right v)
  = prettyValue v
 prettyResult (Left err)
  = "Error: " ++ err

 -- |
 -- >>> putStrLn $ prettyEval $ debug $ Add (IntLit 2) (IntLit 3)
 -- (2 + 3) --> 5
 -- >>> putStrLn $ prettyEval $ debug example
 -- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) --> 8
 prettyEval
  :: EvalTree
  -> String
 prettyEval node
  = prettyTerm (input node) ++ " --> " ++ prettyResult (output node)


 main
  :: IO ()
 main = do
  putStrLn "typechecks."
	-- A proof of concept for a value-based debugging technique: instead of
	-- debugging by imperatively stepping through the evaluation, we instead
	-- produce a value representing the evaluation tree.
	--
	-- This value can then be manipulated like any other value, e.g. instead adding
	-- a breakpoint and stepping until the breakpoint is hit, you can write an
	-- ordinary function which recurs into the evaluation tree until it finds a node
	-- of interest. This way, more complex conditions can be easily expressed, e.g.
	-- "find the smallest subtree in which a call of 'double' does not result in an
	-- output which is the double of its input".

	{-# LANGUAGE PatternGuards #-}
	module ValueBasedDebugging where

	import Data.Foldable (minimumBy)
	import Data.Ord (comparing)


	---------------------
	-- Lambda calculus --
	---------------------

	-- For this proof of concept, we use a simple lambda calculus with integers and
	-- addition. Everything in this section is standard.

	data Term
	= Var String
	\| Lam String Term
	\| App Term Term
	\| IntLit Int
	\| Add Term Term
	deriving (Eq, Show)

	data Value
	= Closure Env String Term
	\| IntVal Int
	deriving (Eq, Show)

	type Env = [(String, Value)]

	-- Our language does not have a 'let' construct, but it is easy to define one in
	-- terms of the constructs it does have.
	let_
	:: String
	-> Term
	-> Term
	-> Term
	let_ x xDefinition body
	= App (Lam x body) xDefinition

	-- An example program with an intentional bug. We are trying to calculate
	-- @222*2 = 16@, but we accidentally defined @double x = x + 2@ instead of
	-- @double x = x + x@, so the result is @2+2+2+2 = 8@.
	example
	:: Term
	example
	= let_ "double" (Lam "x" (Add (Var "x")
	(IntLit 2))) -- bug!
	$ App (Var "double")
	$ App (Var "double")
	$ App (Var "double")
	$ IntLit 2

	-- \|
	-- Evaluation can either succeed with a value, or fail with an error message.
	--
	-- >>> eval $ Add (IntLit 2) (IntLit 3)
	-- Right (IntVal 5)
	-- >>> eval example
	-- Right (IntVal 8)
	-- >>> eval $ Add (IntLit 2) (Var "x")
	-- Left "Variable \"x\" not in scope"
	evalWith
	:: Env
	-> Term
	-> Either String Value
	evalWith env (Var x) = case lookup x env of
	Just v -> do
	pure v
	Nothing -> do
	Left $ "Variable " ++ show x ++ " not in scope"
	evalWith env (Lam x body) = do
	pure $ Closure env x body
	evalWith env (App f e) = do
	fv <- evalWith env f
	ev <- evalWith env e
	case fv of
	Closure capturedEnv x body -> do
	evalWith ((x,ev) : capturedEnv) body
	IntVal n -> do
	Left $ "Integer " ++ show n ++ " is not a function"
	evalWith _ (IntLit n) = do
	pure $ IntVal n
	evalWith env (Add e1 e2) = do
	v1 <- evalWith env e1
	v2 <- evalWith env e2
	case v1 of
	IntVal n1 -> case v2 of
	IntVal n2 -> do
	pure $ IntVal (n1 + n2)
	Closure {} -> do
	error "Functions cannot be added"
	Closure {} -> do
	error "Functions cannot be added"

	eval
	:: Term
	-> Either String Value
	eval
	= evalWith []


	--------------------------
	-- Value-based debugger --
	--------------------------

	-- At each node of the evaluation tree, an input term is evaluated to an output
	-- value in an environment. It is possible to look at the output right away, or
	-- to dive down the 'details' field to look at the first evaluation step towards
	-- that output.
	data EvalTree = EvalTree
	{ environment
	:: Env
	, input
	:: Term
	, output
	:: Either String Value
	, details
	:: EvalStep
	}
	deriving Show

	-- Some input terms are already values, and thus do not require any further
	-- evaluation. Other terms require the evaluation of multiple sub-terms, which
	-- is why the evaluation tree is a tree.
	--
	-- While 'eval' chooses to evaluate those sub-terms in a specific order, for
	-- debugging it is more convenient to allow the user to choose which sub-term to
	-- explore next.
	data EvalStep
	= EvalVar
	\| EvalLam
	\| EvalApp AppDetails
	\| EvalIntLit
	\| EvalAdd EvalTree EvalTree
	deriving Show

	-- Ideally, 'appFun' evaluates to (the closure representation of) @Lam "x" body@,
	-- in which case 'appArgName' is @Just "x"@ and 'appResult' is the evaluation
	-- tree for @body@ in a extended environment in which @x@ is bound to the output
	-- of 'appArg'. However, another possibility is that the call is about to fail
	-- with a type error, because 'appFun' does not evaluate to a function. In that
	-- case, 'appArgName' and 'appResult' are both 'Nothing'.
	data AppDetails = AppDetails
	{ appFun
	:: EvalTree
	, appArgName
	:: Maybe String
	, appArg
	:: EvalTree
	, appResult
	:: Maybe EvalTree
	}
	deriving Show

	-- \|
	-- Evaluate an input term into an output value, while recording the evaluation
	-- tree. Note that the output value is computed using the 'evalWith' function
	-- from the previous section, _not_ by calculating the entire evaluation tree
	-- and then throwing away the debugging information. This allows the user to
	-- take advantage of laziness, by only generating debugging information for the
	-- parts of the evaluation tree that are actually needed.
	--
	-- For example, here is a computation consisting of two sub-expressions. We will
	-- generate debugging information for the @3 + 4@ sub-expression but not for the
	-- @1 + 2@ sub-expression.
	--
	-- >>> node1234 = debug $ (IntLit 1 `Add` IntLit 2) `Add` (IntLit 3 `Add` IntLit 4)
	--
	-- Step into the overall expression:
	-- >>> EvalAdd node12 node34 = details node1234
	--
	-- Step over the @1 + 2@ sub-expression:
	-- >>> output node12
	-- Right (IntVal 3)
	--
	-- Step into the @3 + 4@ sub-expression:
	-- >>> EvalAdd node3 node4 = details node34
	--
	-- Evaluate the leaves:
	-- >>> output node3
	-- Right (IntVal 3)
	-- >>> output node4
	-- Right (IntVal 4)
	--
	-- Step out of the @3 + 4@ sub-expression:
	-- >>> output node34
	-- Right (IntVal 7)

	-- Step out of the overall expression:
	-- >>> output node1234
	-- Right (IntVal 10)
	debugWith
	:: Env
	-> Term
	-> EvalTree
	debugWith env term = EvalTree
	{ environment
	= env
	, input
	= term
	, output
	= evalWith env term
	, details
	= go term
	}
	where
	go
	:: Term
	-> EvalStep
	go (Var {})
	= EvalVar
	go (Lam {})
	= EvalLam
	go (App funcTerm argTerm)
	= let funcNode = debugWith env funcTerm
	argNode = debugWith env argTerm
	in EvalApp $ AppDetails
	{ appFun
	= funcNode
	, appArgName
	= do
	Right (Closure _ argName _) <- pure $ output funcNode
	pure argName
	, appArg
	= argNode
	, appResult
	= do
	Right (Closure capturedEnv argName body) <- pure $ output funcNode
	Right argValue <- pure $ output argNode
	pure $ debugWith ((argName, argValue) : capturedEnv) body
	}
	go (IntLit {})
	= EvalIntLit
	go (Add e1 e2)
	= let node1 = debugWith env e1
	node2 = debugWith env e2
	in EvalAdd node1 node2

	debug
	:: Term
	-> EvalTree
	debug = debugWith []


	-----------------
	-- Breakpoints --
	-----------------

	exampleEvalTree
	:: EvalTree
	exampleEvalTree
	= debug example

	-- We've seen what the usual "step into", "step over", and "step out" commands
	-- of a debugger translate to in the value-based debugging world. What about
	-- breakpoints? In an imperative debugger, running up to a breakpoint means to
	-- automatically step into and out of sub-calls until a particular line is
	-- encountered. Or, for data breakpoints, until a particular variable is set to
	-- a value which matches a particular predicate. In a value-based debugger, we
	-- generalize the concept: a breakpoint is a predicate on the evaluation tree.
	-- The predicate can look at the line number (or it could if our term
	-- representation had line numbers), at the values of the variables in the
	-- environment, or at anything else which can be computed from the evaluation
	-- tree.

	-- \|
	-- >>> length $ allNodes $ debug $ Add (IntLit 2) (IntLit 3)
	-- 3
	allNodes
	:: EvalTree
	-> [EvalTree]
	allNodes node
	= node
	: case details node of
	EvalVar
	-> []
	EvalLam
	-> []
	EvalApp (AppDetails f _ arg result)
	-> allNodes f ++ allNodes arg ++ maybe [] allNodes result
	EvalIntLit
	-> []
	EvalAdd e1 e2
	-> allNodes e1 ++ allNodes e2

	-- \|
	-- Look for points in the evaluation where a particular variable is set to a
	-- value of interest.
	--
	-- >>> nodes = allNodes exampleEvalTree
	-- >>> matches = filter (dataBreakpoint "x" (== IntVal 4)) nodes
	-- >>> mapM_ (putStrLn . prettyEval) matches
	-- (x + 2) --> 6
	-- x --> 4
	-- 2 --> 2
	dataBreakpoint
	:: String
	-> (Value -> Bool)
	-> EvalTree -> Bool
	dataBreakpoint varName predicate node
	\| Just value <- lookup varName (environment node)
	= predicate value
	\| otherwise
	= False


	------------------------
	-- Sub-program finder --
	------------------------

	-- Being able to write an _arbitrary_ predicate, instead of being limited to
	-- line numbers and data breakpoints, unlocks a new world of debugging queries.
	--
	-- For example, our example program is producing the wrong answer, but let's
	-- pretend that the code is so large that we can't understand why just by
	-- looking at the code. Let's use the evaluation tree to simplify the problem,
	-- by finding the smaller sub-program which also produces the wrong answer.
	--
	-- To do this, we will use an oracle which always produces the right answer. At
	-- first glance, it might sound silly to assume that we have such an oracle,
	-- because if we had one, then we wouldn't need to debug our program, we could
	-- just trash our program and use our oracle instead. But oracles are actually a
	-- common technique for testing refactorings and optimizations: the oracle is
	-- simply the program before the refactoring, or the program without the
	-- optimization.

	-- \|
	-- >>> isCallTo "double" $ App (Var "double") (IntLit 2)
	-- True
	-- >>> isCallTo "double" $ App (Var "triple") (IntLit 2)
	-- False
	isCallTo
	:: String
	-> Term -> Bool
	isCallTo expectedFuncName (App (Var actualFuncName) _)
	= actualFuncName == expectedFuncName
	isCallTo _ _
	= False

	-- \|
	-- >>> mapM_ (putStrLn . prettyEval) doubleCalls
	-- (double (double (double 2))) --> 8
	-- (double (double 2)) --> 6
	-- (double 2) --> 4
	doubleCalls
	:: [EvalTree]
	doubleCalls
	= filter (isCallTo "double" . input)
	$ allNodes exampleEvalTree

	-- \|
	-- >>> mismatches = filter (not . matchesOracle (* 2)) doubleCalls
	-- >>> mapM_ (putStrLn . prettyEval) mismatches
	-- (double (double (double 2))) --> 8
	-- (double (double 2)) --> 6
	matchesOracle
	:: (Int -> Int)
	-> EvalTree -> Bool
	matchesOracle oracle node
	\| EvalApp (AppDetails _ _ argNode _) <- details node
	, Right (IntVal argInt) <- output argNode
	, Right (IntVal actualOutput) <- output node
	= actualOutput == oracle argInt
	\| otherwise
	= False

	-- \|
	-- >>> size $ Add (IntLit 2) (IntLit 3)
	-- 3
	-- >>> map (size . input) doubleCalls
	-- [7,5,3]
	size
	:: Term
	-> Int
	size (Var {})
	= 1
	size (Lam _ body)
	= 1 + size body
	size (App f e)
	= 1 + size f + size e
	size (IntLit {})
	= 1
	size (Add e1 e2)
	= 1 + size e1 + size e2

	-- \|
	-- >>> putStrLn $ prettyEval smallestSubprogramDemonstratingTheBug
	-- (double (double 2)) --> 6
	smallestSubprogramDemonstratingTheBug
	:: EvalTree
	smallestSubprogramDemonstratingTheBug
	= minimumBy (comparing (size . input))
	$ filter (not . matchesOracle (* 2))
	$ doubleCalls

	-- Tada! We've found the smallest sub-program which demonstrates the bug. Isn't
	-- value-based debugging amazing?


	------------------------
	-- Minimal repro case --
	------------------------

	-- But we are not done yet! We didn't perform a series of actions on an
	-- ephemeral instance of our program, we wrote a _function_, and functions are
	-- reusable. This means we can use this same function to debug the next program.
	-- Or a variant of the current program. That's right, since the evaluation tree
	-- is an ordinary value, this means we are not limited to debugging the program
	-- as it is, we can also generate alternative implementations on the fly and
	-- examine what would have been if we had written the program differently.
	--
	-- For example, let's say that the sub-program we found is still too complex.
	-- We can make systematic simplifications to the program and test whether it
	-- still exhibits the buggy behaviour. If any further simplification causes the
	-- buggy behaviour to go away, then we have found a minimal repro case.

	-- \|
	-- Each result makes a single simplification to the code.
	-- Returns the empty list if no further simplifications are possible.
	--
	-- >>> simplerTerms = simplify $ Add (IntLit 2) (IntLit 3)
	-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
	-- 2
	-- 3
	-- (1 + 3)
	-- (2 + 2)
	--
	-- >>> simplerTerms = simplify example
	-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
	-- ((\double -> (double (double 4))) (\x -> (x + 2)))
	-- ((\double -> (double (double (double 1)))) (\x -> (x + 2)))
	-- ((\double -> (double (double (double 2)))) (\x -> x))
	-- ((\double -> (double (double (double 2)))) (\x -> 2))
	-- ((\double -> (double (double (double 2)))) (\x -> (x + 1)))
	--
	-- >>> simplerTerms = simplify $ input smallestSubprogramDemonstratingTheBug
	-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms
	-- (double 4)
	-- (double (double 1))
	simplify
	:: Term
	-> [Term]
	simplify (Var {})
	= []
	simplify (Lam x body)
	= [Lam x body' \| body' <- simplify body]
	simplify (App f@(Var "double") e@(IntLit n))
	= [IntLit (2 * n)] -- replace a sub-call with the oracle's correct result
	++ [ App f' e \| f' <- simplify f ]
	++ [ App f e' \| e' <- simplify e ]
	simplify (App f e)
	= [ App f' e \| f' <- simplify f ]
	++ [ App f e' \| e' <- simplify e ]
	simplify (IntLit n)
	\| n > 0
	= [IntLit (n-1)]
	\| n < 0
	= [IntLit (n+1)]
	\| otherwise
	= []
	simplify (Add e1 e2)
	= [e1, e2]
	++ [ Add e1' e2 \| e1' <- simplify e1 ]
	++ [ Add e1 e2' \| e2' <- simplify e2 ]

	-- \|
	-- >>> env = environment smallestSubprogramDemonstratingTheBug
	-- >>> term = input smallestSubprogramDemonstratingTheBug
	-- >>> incorrect = not . matchesOracle (* 2) . debugWith env
	-- >>> predicate term = isCallTo "double" term && incorrect term
	-- >>> reproCase = smallestReproCase predicate term
	-- >>> putStrLn $ prettyEval $ debugWith env reproCase
	-- (double 3) --> 5
	smallestReproCase
	:: (Term -> Bool) -- True if the program exhibits the bug
	-> Term -- the predicate must return True for this original term
	-> Term
	smallestReproCase predicate originalTerm
	= case filter predicate $ simplify originalTerm of
	simplerTerm : _
	-> smallestReproCase predicate simplerTerm
	[]
	-> originalTerm

	-- Tada! We've found the smallest repro case. Isn't value-based debugging
	-- amazing? Let's implement something like this for a non-toy language!


	---------------------
	-- Pretty printing --
	---------------------

	-- Nothing interesting in the remainder of this file, just pretty printing
	-- functions to make the output of the examples more readable.

	-- \|
	-- >>> putStrLn $ prettyTerm $ Add (IntLit 2) (IntLit 3)
	-- (2 + 3)
	-- >>> putStrLn $ prettyTerm example
	-- ((\double -> (double (double (double 2)))) (\x -> (x + 2)))
	prettyTerm
	:: Term
	-> String
	prettyTerm (Var x)
	= x
	prettyTerm (Lam x body)
	= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
	prettyTerm (App f e)
	= "(" ++ prettyTerm f ++ " " ++ prettyTerm e ++ ")"
	prettyTerm (IntLit n)
	= show n
	prettyTerm (Add e1 e2)
	= "(" ++ prettyTerm e1 ++ " + " ++ prettyTerm e2 ++ ")"

	-- \|
	-- >>> putStrLn $ prettyValue $ IntVal 2
	-- 2
	-- >>> putStrLn $ prettyValue $ Closure [] "x" $ Add (Var "x") (IntLit 2)
	-- (\x -> (x + 2))
	prettyValue
	:: Value
	-> String
	prettyValue (Closure _ x body)
	= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")"
	prettyValue (IntVal n)
	= show n

	-- \|
	-- >>> putStrLn $ prettyResult $ eval $ Add (IntLit 2) (IntLit 3)
	-- 5
	-- >>> putStrLn $ prettyResult $ eval example
	-- 8
	prettyResult
	:: Either String Value
	-> String
	prettyResult (Right v)
	= prettyValue v
	prettyResult (Left err)
	= "Error: " ++ err

	-- \|
	-- >>> putStrLn $ prettyEval $ debug $ Add (IntLit 2) (IntLit 3)
	-- (2 + 3) --> 5
	-- >>> putStrLn $ prettyEval $ debug example
	-- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) --> 8
	prettyEval
	:: EvalTree
	-> String
	prettyEval node
	= prettyTerm (input node) ++ " --> " ++ prettyResult (output node)


	main
	:: IO ()
	main = do
	putStrLn "typechecks."