Last active
July 29, 2023 07:23
-
-
Save gelisam/820dae569c6b0747098a001dc6ae7eaf to your computer and use it in GitHub Desktop.
A proof of concept for a value-based debugging technique
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- A proof of concept for a value-based debugging technique: instead of | |
-- debugging by imperatively stepping through the evaluation, we instead | |
-- produce a value representing the evaluation tree. | |
-- | |
-- This value can then be manipulated like any other value, e.g. instead adding | |
-- a breakpoint and stepping until the breakpoint is hit, you can write an | |
-- ordinary function which recurs into the evaluation tree until it finds a node | |
-- of interest. This way, more complex conditions can be easily expressed, e.g. | |
-- "find the smallest subtree in which a call of 'double' does not result in an | |
-- output which is the double of its input". | |
{-# LANGUAGE PatternGuards #-} | |
module ValueBasedDebugging where | |
import Data.Foldable (minimumBy) | |
import Data.Ord (comparing) | |
--------------------- | |
-- Lambda calculus -- | |
--------------------- | |
-- For this proof of concept, we use a simple lambda calculus with integers and | |
-- addition. Everything in this section is standard. | |
data Term | |
= Var String | |
| Lam String Term | |
| App Term Term | |
| IntLit Int | |
| Add Term Term | |
deriving (Eq, Show) | |
data Value | |
= Closure Env String Term | |
| IntVal Int | |
deriving (Eq, Show) | |
type Env = [(String, Value)] | |
-- Our language does not have a 'let' construct, but it is easy to define one in | |
-- terms of the constructs it does have. | |
let_ | |
:: String | |
-> Term | |
-> Term | |
-> Term | |
let_ x xDefinition body | |
= App (Lam x body) xDefinition | |
-- An example program with an intentional bug. We are trying to calculate | |
-- @2*2*2*2 = 16@, but we accidentally defined @double x = x + 2@ instead of | |
-- @double x = x + x@, so the result is @2+2+2+2 = 8@. | |
example | |
:: Term | |
example | |
= let_ "double" (Lam "x" (Add (Var "x") | |
(IntLit 2))) -- bug! | |
$ App (Var "double") | |
$ App (Var "double") | |
$ App (Var "double") | |
$ IntLit 2 | |
-- | | |
-- Evaluation can either succeed with a value, or fail with an error message. | |
-- | |
-- >>> eval $ Add (IntLit 2) (IntLit 3) | |
-- Right (IntVal 5) | |
-- >>> eval example | |
-- Right (IntVal 8) | |
-- >>> eval $ Add (IntLit 2) (Var "x") | |
-- Left "Variable \"x\" not in scope" | |
evalWith | |
:: Env | |
-> Term | |
-> Either String Value | |
evalWith env (Var x) = case lookup x env of | |
Just v -> do | |
pure v | |
Nothing -> do | |
Left $ "Variable " ++ show x ++ " not in scope" | |
evalWith env (Lam x body) = do | |
pure $ Closure env x body | |
evalWith env (App f e) = do | |
fv <- evalWith env f | |
ev <- evalWith env e | |
case fv of | |
Closure capturedEnv x body -> do | |
evalWith ((x,ev) : capturedEnv) body | |
IntVal n -> do | |
Left $ "Integer " ++ show n ++ " is not a function" | |
evalWith _ (IntLit n) = do | |
pure $ IntVal n | |
evalWith env (Add e1 e2) = do | |
v1 <- evalWith env e1 | |
v2 <- evalWith env e2 | |
case v1 of | |
IntVal n1 -> case v2 of | |
IntVal n2 -> do | |
pure $ IntVal (n1 + n2) | |
Closure {} -> do | |
error "Functions cannot be added" | |
Closure {} -> do | |
error "Functions cannot be added" | |
eval | |
:: Term | |
-> Either String Value | |
eval | |
= evalWith [] | |
-------------------------- | |
-- Value-based debugger -- | |
-------------------------- | |
-- At each node of the evaluation tree, an input term is evaluated to an output | |
-- value in an environment. It is possible to look at the output right away, or | |
-- to dive down the 'details' field to look at the first evaluation step towards | |
-- that output. | |
data EvalTree = EvalTree | |
{ environment | |
:: Env | |
, input | |
:: Term | |
, output | |
:: Either String Value | |
, details | |
:: EvalStep | |
} | |
deriving Show | |
-- Some input terms are already values, and thus do not require any further | |
-- evaluation. Other terms require the evaluation of multiple sub-terms, which | |
-- is why the evaluation tree is a tree. | |
-- | |
-- While 'eval' chooses to evaluate those sub-terms in a specific order, for | |
-- debugging it is more convenient to allow the user to choose which sub-term to | |
-- explore next. | |
data EvalStep | |
= EvalVar | |
| EvalLam | |
| EvalApp AppDetails | |
| EvalIntLit | |
| EvalAdd EvalTree EvalTree | |
deriving Show | |
-- Ideally, 'appFun' evaluates to (the closure representation of) @Lam "x" body@, | |
-- in which case 'appArgName' is @Just "x"@ and 'appResult' is the evaluation | |
-- tree for @body@ in a extended environment in which @x@ is bound to the output | |
-- of 'appArg'. However, another possibility is that the call is about to fail | |
-- with a type error, because 'appFun' does not evaluate to a function. In that | |
-- case, 'appArgName' and 'appResult' are both 'Nothing'. | |
data AppDetails = AppDetails | |
{ appFun | |
:: EvalTree | |
, appArgName | |
:: Maybe String | |
, appArg | |
:: EvalTree | |
, appResult | |
:: Maybe EvalTree | |
} | |
deriving Show | |
-- | | |
-- Evaluate an input term into an output value, while recording the evaluation | |
-- tree. Note that the output value is computed using the 'evalWith' function | |
-- from the previous section, _not_ by calculating the entire evaluation tree | |
-- and then throwing away the debugging information. This allows the user to | |
-- take advantage of laziness, by only generating debugging information for the | |
-- parts of the evaluation tree that are actually needed. | |
-- | |
-- For example, here is a computation consisting of two sub-expressions. We will | |
-- generate debugging information for the @3 + 4@ sub-expression but not for the | |
-- @1 + 2@ sub-expression. | |
-- | |
-- >>> node1234 = debug $ (IntLit 1 `Add` IntLit 2) `Add` (IntLit 3 `Add` IntLit 4) | |
-- | |
-- Step into the overall expression: | |
-- >>> EvalAdd node12 node34 = details node1234 | |
-- | |
-- Step over the @1 + 2@ sub-expression: | |
-- >>> output node12 | |
-- Right (IntVal 3) | |
-- | |
-- Step into the @3 + 4@ sub-expression: | |
-- >>> EvalAdd node3 node4 = details node34 | |
-- | |
-- Evaluate the leaves: | |
-- >>> output node3 | |
-- Right (IntVal 3) | |
-- >>> output node4 | |
-- Right (IntVal 4) | |
-- | |
-- Step out of the @3 + 4@ sub-expression: | |
-- >>> output node34 | |
-- Right (IntVal 7) | |
-- Step out of the overall expression: | |
-- >>> output node1234 | |
-- Right (IntVal 10) | |
debugWith | |
:: Env | |
-> Term | |
-> EvalTree | |
debugWith env term = EvalTree | |
{ environment | |
= env | |
, input | |
= term | |
, output | |
= evalWith env term | |
, details | |
= go term | |
} | |
where | |
go | |
:: Term | |
-> EvalStep | |
go (Var {}) | |
= EvalVar | |
go (Lam {}) | |
= EvalLam | |
go (App funcTerm argTerm) | |
= let funcNode = debugWith env funcTerm | |
argNode = debugWith env argTerm | |
in EvalApp $ AppDetails | |
{ appFun | |
= funcNode | |
, appArgName | |
= do | |
Right (Closure _ argName _) <- pure $ output funcNode | |
pure argName | |
, appArg | |
= argNode | |
, appResult | |
= do | |
Right (Closure capturedEnv argName body) <- pure $ output funcNode | |
Right argValue <- pure $ output argNode | |
pure $ debugWith ((argName, argValue) : capturedEnv) body | |
} | |
go (IntLit {}) | |
= EvalIntLit | |
go (Add e1 e2) | |
= let node1 = debugWith env e1 | |
node2 = debugWith env e2 | |
in EvalAdd node1 node2 | |
debug | |
:: Term | |
-> EvalTree | |
debug = debugWith [] | |
----------------- | |
-- Breakpoints -- | |
----------------- | |
exampleEvalTree | |
:: EvalTree | |
exampleEvalTree | |
= debug example | |
-- We've seen what the usual "step into", "step over", and "step out" commands | |
-- of a debugger translate to in the value-based debugging world. What about | |
-- breakpoints? In an imperative debugger, running up to a breakpoint means to | |
-- automatically step into and out of sub-calls until a particular line is | |
-- encountered. Or, for data breakpoints, until a particular variable is set to | |
-- a value which matches a particular predicate. In a value-based debugger, we | |
-- generalize the concept: a breakpoint is a predicate on the evaluation tree. | |
-- The predicate can look at the line number (or it could if our term | |
-- representation had line numbers), at the values of the variables in the | |
-- environment, or at anything else which can be computed from the evaluation | |
-- tree. | |
-- | | |
-- >>> length $ allNodes $ debug $ Add (IntLit 2) (IntLit 3) | |
-- 3 | |
allNodes | |
:: EvalTree | |
-> [EvalTree] | |
allNodes node | |
= node | |
: case details node of | |
EvalVar | |
-> [] | |
EvalLam | |
-> [] | |
EvalApp (AppDetails f _ arg result) | |
-> allNodes f ++ allNodes arg ++ maybe [] allNodes result | |
EvalIntLit | |
-> [] | |
EvalAdd e1 e2 | |
-> allNodes e1 ++ allNodes e2 | |
-- | | |
-- Look for points in the evaluation where a particular variable is set to a | |
-- value of interest. | |
-- | |
-- >>> nodes = allNodes exampleEvalTree | |
-- >>> matches = filter (dataBreakpoint "x" (== IntVal 4)) nodes | |
-- >>> mapM_ (putStrLn . prettyEval) matches | |
-- (x + 2) --> 6 | |
-- x --> 4 | |
-- 2 --> 2 | |
dataBreakpoint | |
:: String | |
-> (Value -> Bool) | |
-> EvalTree -> Bool | |
dataBreakpoint varName predicate node | |
| Just value <- lookup varName (environment node) | |
= predicate value | |
| otherwise | |
= False | |
------------------------ | |
-- Sub-program finder -- | |
------------------------ | |
-- Being able to write an _arbitrary_ predicate, instead of being limited to | |
-- line numbers and data breakpoints, unlocks a new world of debugging queries. | |
-- | |
-- For example, our example program is producing the wrong answer, but let's | |
-- pretend that the code is so large that we can't understand why just by | |
-- looking at the code. Let's use the evaluation tree to simplify the problem, | |
-- by finding the smaller sub-program which also produces the wrong answer. | |
-- | |
-- To do this, we will use an oracle which always produces the right answer. At | |
-- first glance, it might sound silly to assume that we have such an oracle, | |
-- because if we had one, then we wouldn't need to debug our program, we could | |
-- just trash our program and use our oracle instead. But oracles are actually a | |
-- common technique for testing refactorings and optimizations: the oracle is | |
-- simply the program before the refactoring, or the program without the | |
-- optimization. | |
-- | | |
-- >>> isCallTo "double" $ App (Var "double") (IntLit 2) | |
-- True | |
-- >>> isCallTo "double" $ App (Var "triple") (IntLit 2) | |
-- False | |
isCallTo | |
:: String | |
-> Term -> Bool | |
isCallTo expectedFuncName (App (Var actualFuncName) _) | |
= actualFuncName == expectedFuncName | |
isCallTo _ _ | |
= False | |
-- | | |
-- >>> mapM_ (putStrLn . prettyEval) doubleCalls | |
-- (double (double (double 2))) --> 8 | |
-- (double (double 2)) --> 6 | |
-- (double 2) --> 4 | |
doubleCalls | |
:: [EvalTree] | |
doubleCalls | |
= filter (isCallTo "double" . input) | |
$ allNodes exampleEvalTree | |
-- | | |
-- >>> mismatches = filter (not . matchesOracle (* 2)) doubleCalls | |
-- >>> mapM_ (putStrLn . prettyEval) mismatches | |
-- (double (double (double 2))) --> 8 | |
-- (double (double 2)) --> 6 | |
matchesOracle | |
:: (Int -> Int) | |
-> EvalTree -> Bool | |
matchesOracle oracle node | |
| EvalApp (AppDetails _ _ argNode _) <- details node | |
, Right (IntVal argInt) <- output argNode | |
, Right (IntVal actualOutput) <- output node | |
= actualOutput == oracle argInt | |
| otherwise | |
= False | |
-- | | |
-- >>> size $ Add (IntLit 2) (IntLit 3) | |
-- 3 | |
-- >>> map (size . input) doubleCalls | |
-- [7,5,3] | |
size | |
:: Term | |
-> Int | |
size (Var {}) | |
= 1 | |
size (Lam _ body) | |
= 1 + size body | |
size (App f e) | |
= 1 + size f + size e | |
size (IntLit {}) | |
= 1 | |
size (Add e1 e2) | |
= 1 + size e1 + size e2 | |
-- | | |
-- >>> putStrLn $ prettyEval smallestSubprogramDemonstratingTheBug | |
-- (double (double 2)) --> 6 | |
smallestSubprogramDemonstratingTheBug | |
:: EvalTree | |
smallestSubprogramDemonstratingTheBug | |
= minimumBy (comparing (size . input)) | |
$ filter (not . matchesOracle (* 2)) | |
$ doubleCalls | |
-- Tada! We've found the smallest sub-program which demonstrates the bug. Isn't | |
-- value-based debugging amazing? | |
------------------------ | |
-- Minimal repro case -- | |
------------------------ | |
-- But we are not done yet! We didn't perform a series of actions on an | |
-- ephemeral instance of our program, we wrote a _function_, and functions are | |
-- reusable. This means we can use this same function to debug the next program. | |
-- Or a variant of the current program. That's right, since the evaluation tree | |
-- is an ordinary value, this means we are not limited to debugging the program | |
-- as it is, we can also generate alternative implementations on the fly and | |
-- examine what would have been if we had written the program differently. | |
-- | |
-- For example, let's say that the sub-program we found is still too complex. | |
-- We can make systematic simplifications to the program and test whether it | |
-- still exhibits the buggy behaviour. If any further simplification causes the | |
-- buggy behaviour to go away, then we have found a minimal repro case. | |
-- | | |
-- Each result makes a single simplification to the code. | |
-- Returns the empty list if no further simplifications are possible. | |
-- | |
-- >>> simplerTerms = simplify $ Add (IntLit 2) (IntLit 3) | |
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms | |
-- 2 | |
-- 3 | |
-- (1 + 3) | |
-- (2 + 2) | |
-- | |
-- >>> simplerTerms = simplify example | |
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms | |
-- ((\double -> (double (double 4))) (\x -> (x + 2))) | |
-- ((\double -> (double (double (double 1)))) (\x -> (x + 2))) | |
-- ((\double -> (double (double (double 2)))) (\x -> x)) | |
-- ((\double -> (double (double (double 2)))) (\x -> 2)) | |
-- ((\double -> (double (double (double 2)))) (\x -> (x + 1))) | |
-- | |
-- >>> simplerTerms = simplify $ input smallestSubprogramDemonstratingTheBug | |
-- >>> mapM_ (putStrLn . prettyTerm) simplerTerms | |
-- (double 4) | |
-- (double (double 1)) | |
simplify | |
:: Term | |
-> [Term] | |
simplify (Var {}) | |
= [] | |
simplify (Lam x body) | |
= [Lam x body' | body' <- simplify body] | |
simplify (App f@(Var "double") e@(IntLit n)) | |
= [IntLit (2 * n)] -- replace a sub-call with the oracle's correct result | |
++ [ App f' e | f' <- simplify f ] | |
++ [ App f e' | e' <- simplify e ] | |
simplify (App f e) | |
= [ App f' e | f' <- simplify f ] | |
++ [ App f e' | e' <- simplify e ] | |
simplify (IntLit n) | |
| n > 0 | |
= [IntLit (n-1)] | |
| n < 0 | |
= [IntLit (n+1)] | |
| otherwise | |
= [] | |
simplify (Add e1 e2) | |
= [e1, e2] | |
++ [ Add e1' e2 | e1' <- simplify e1 ] | |
++ [ Add e1 e2' | e2' <- simplify e2 ] | |
-- | | |
-- >>> env = environment smallestSubprogramDemonstratingTheBug | |
-- >>> term = input smallestSubprogramDemonstratingTheBug | |
-- >>> incorrect = not . matchesOracle (* 2) . debugWith env | |
-- >>> predicate term = isCallTo "double" term && incorrect term | |
-- >>> reproCase = smallestReproCase predicate term | |
-- >>> putStrLn $ prettyEval $ debugWith env reproCase | |
-- (double 3) --> 5 | |
smallestReproCase | |
:: (Term -> Bool) -- True if the program exhibits the bug | |
-> Term -- the predicate must return True for this original term | |
-> Term | |
smallestReproCase predicate originalTerm | |
= case filter predicate $ simplify originalTerm of | |
simplerTerm : _ | |
-> smallestReproCase predicate simplerTerm | |
[] | |
-> originalTerm | |
-- Tada! We've found the smallest repro case. Isn't value-based debugging | |
-- amazing? Let's implement something like this for a non-toy language! | |
--------------------- | |
-- Pretty printing -- | |
--------------------- | |
-- Nothing interesting in the remainder of this file, just pretty printing | |
-- functions to make the output of the examples more readable. | |
-- | | |
-- >>> putStrLn $ prettyTerm $ Add (IntLit 2) (IntLit 3) | |
-- (2 + 3) | |
-- >>> putStrLn $ prettyTerm example | |
-- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) | |
prettyTerm | |
:: Term | |
-> String | |
prettyTerm (Var x) | |
= x | |
prettyTerm (Lam x body) | |
= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")" | |
prettyTerm (App f e) | |
= "(" ++ prettyTerm f ++ " " ++ prettyTerm e ++ ")" | |
prettyTerm (IntLit n) | |
= show n | |
prettyTerm (Add e1 e2) | |
= "(" ++ prettyTerm e1 ++ " + " ++ prettyTerm e2 ++ ")" | |
-- | | |
-- >>> putStrLn $ prettyValue $ IntVal 2 | |
-- 2 | |
-- >>> putStrLn $ prettyValue $ Closure [] "x" $ Add (Var "x") (IntLit 2) | |
-- (\x -> (x + 2)) | |
prettyValue | |
:: Value | |
-> String | |
prettyValue (Closure _ x body) | |
= "(\\" ++ x ++ " -> " ++ prettyTerm body ++ ")" | |
prettyValue (IntVal n) | |
= show n | |
-- | | |
-- >>> putStrLn $ prettyResult $ eval $ Add (IntLit 2) (IntLit 3) | |
-- 5 | |
-- >>> putStrLn $ prettyResult $ eval example | |
-- 8 | |
prettyResult | |
:: Either String Value | |
-> String | |
prettyResult (Right v) | |
= prettyValue v | |
prettyResult (Left err) | |
= "Error: " ++ err | |
-- | | |
-- >>> putStrLn $ prettyEval $ debug $ Add (IntLit 2) (IntLit 3) | |
-- (2 + 3) --> 5 | |
-- >>> putStrLn $ prettyEval $ debug example | |
-- ((\double -> (double (double (double 2)))) (\x -> (x + 2))) --> 8 | |
prettyEval | |
:: EvalTree | |
-> String | |
prettyEval node | |
= prettyTerm (input node) ++ " --> " ++ prettyResult (output node) | |
main | |
:: IO () | |
main = do | |
putStrLn "typechecks." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment