Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save VictorTaelin/632067b6849c619ef0408347150a3a5e to your computer and use it in GitHub Desktop.
Save VictorTaelin/632067b6849c619ef0408347150a3a5e to your computer and use it in GitHub Desktop.
parse loop debugging prompt: gpt-oss vs kimi k2 vs qwen3 coder vs Qwen3 235B A22B Thinking 2507
-- TASK: create a Parser for Kind7.
-- - don't be inventive, just use a simple approach that is guaranteed to work
-- - keep it simple and idiomatic
-- - include nice error messages
-- On the 'parseApp parser:
-- - support (f x y z) as an alias to (((f x) y) z)
-- - do so by parsing '(', then terms until you hit a ')', then foldl to build the App
-- Note on operators:
-- - syntaxes like 'a,b', 'a<>b', 'a->b', 'a:b', etc., MUST be treated as operators
-- - there is no precedence, and operators always fold right: a,b,c,d → a,(b,(c,d))
-- - implemented his using as imple logic:
-- - the term parser is split in two: parseTermIni and parseTermEnd
-- - the parseTermIni handles all core syntaxes (i.e., non-operators)
-- - the parseTermEnd receives the Term parsed by parseTermIni, and attempts to parse an operator
-- - this allows fast parsing with less backtracking and better error messages
-- Note on error messages:
-- - it should be inspired (but not identical) on this old error implementation:
-- import Highlight (highlightError, highlight)
-- showParseError :: String -> String -> P.ParseError -> IO ()
-- showParseError filename input err = do
-- let pos = errorPos err
-- let lin = sourceLine pos
-- let col = sourceColumn pos
-- let msg = extractExpectedTokens err
-- putStrLn $ setSGRCode [SetConsoleIntensity BoldIntensity] ++ "\nPARSE_ERROR" ++ setSGRCode [Reset]
-- putStrLn $ "- expected: " ++ msg
-- putStrLn $ "- detected:"
-- putStrLn $ highlightError (lin, col) (lin, col + 1) input
-- putStrLn $ setSGRCode [SetUnderlining SingleUnderline] ++ filename ++
-- setSGRCode [Reset] ++ " " ++ show lin ++ ":" ++ show col
-- simplify and adapt it to your own implementation
-- use the Highlight import. IT ALREADY EXISTS, DO NOT RE-CREATE IT.
-- About lambdas:
-- - do NOT handle shadowing or substitution in any way
-- - just parse the input strings into the terms
-- - this is handled elsewhre - separation of concerns
-- remember:
-- - keep this file as simple, elegant and idiomatic.
-- - do not write anything beyond what has been explicitly asked.
module Parse where
import Data.Void
import Highlight (highlightError)
import Text.Megaparsec
import Text.Megaparsec.Char
import Type
import qualified Data.List.NonEmpty as NE
import qualified Text.Megaparsec.Char.Lexer as L
type Parser = Parsec Void String
-- Space consumer
sc :: Parser ()
sc = L.space space1 (L.skipLineComment "--") (L.skipBlockComment "{-" "-}")
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
symbol :: String -> Parser String
symbol = L.symbol sc
parens :: Parser a -> Parser a
parens = between (symbol "(") (symbol ")")
angles :: Parser a -> Parser a
angles = between (symbol "<") (symbol ">")
braces :: Parser a -> Parser a
braces = between (symbol "{") (symbol "}")
brackets :: Parser a -> Parser a
brackets = between (symbol "[") (symbol "]")
identifier :: Parser String
identifier = lexeme $ do
head <- letterChar
tail <- many alphaNumChar
return (head : tail)
-- Main term parser
parseTerm :: Parser Term
parseTerm = do
term <- parseTermIni
parseTermEnd term
-- Initial term parser for non-operator constructs
parseTermIni :: Parser Term
parseTermIni = choice
[ parseVar
, parseSet
, parseEmp
, parseEfq
, parseUni
, parseOne
, parseNat
, parseZer
, parseNil
, try parseApp
, try parseLam
, try parseAll
, try parseSig
, try parseSwi
, try parseUse
, try parseGet
, try parseMat
, try parseAnn
, try parseLet
, try parseSuc
, try parseLst
]
-- Term end parser for operators and continuations
parseTermEnd :: Term -> Parser Term
parseTermEnd term = choice
[ do
_ <- symbol ","
t <- parseTerm
return $ Tup term t
, do
_ <- symbol "<>"
t <- parseTerm
return $ Con term t
, do
_ <- symbol "->"
t <- parseTerm
return $ All term t
, do
_ <- symbol ":"
t <- parseTerm
return $ Chk term t
, return term
]
-- Individual parsers for terms
parseVar :: Parser Term
parseVar = do
name <- identifier
return $ Var name
parseSet :: Parser Term
parseSet = do
_ <- symbol "*"
return Set
parseEmp :: Parser Term
parseEmp = do
_ <- symbol "⊥"
return Emp
parseEfq :: Parser Term
parseEfq = do
_ <- symbol "λ{}"
return Efq
parseUni :: Parser Term
parseUni = do
_ <- symbol "⊤"
return Uni
parseOne :: Parser Term
parseOne = do
_ <- symbol "()"
return One
parseNat :: Parser Term
parseNat = do
_ <- symbol "ℕ"
return Nat
parseZer :: Parser Term
parseZer = do
_ <- symbol "0"
return Zer
parseNil :: Parser Term
parseNil = do
_ <- symbol "[]"
return Nil
parseSuc :: Parser Term
parseSuc = do
_ <- symbol "+"
n <- parseTerm
return $ Suc n
parseApp :: Parser Term
parseApp = parens $ do
f <- parseTerm
xs <- many parseTerm
return $ foldl App f xs
-- Parse lambda abstractions: λx.body
parseLam :: Parser Term
parseLam = do
_ <- symbol "λ" <|> symbol ""
k <- identifier
_ <- symbol "."
body <- parseTerm
return $ Lam k (\v -> body)
-- Parse universal quantifiers: Π(A).B or Pi(A).B
parseAll :: Parser Term
parseAll = do
_ <- symbol "Π" <|> symbol "Pi"
a <- parseTerm
_ <- symbol "."
b <- parseTerm
return $ All a b
-- Parse sigma types: Σ(A).B or Sigma(A).B
parseSig :: Parser Term
parseSig = do
_ <- symbol "Σ" <|> symbol "Sigma"
a <- parseTerm
_ <- symbol "."
b <- parseTerm
return $ Sig a b
-- Parse nat switch: λ{0:z;+:s}
parseSwi :: Parser Term
parseSwi = do
_ <- symbol "λ{"
_ <- symbol "0:"
z <- parseTerm
_ <- symbol ";+:"
s <- parseTerm
_ <- symbol "}"
return $ Swi z s
-- Parse unit eliminator: λ{():f}
parseUse :: Parser Term
parseUse = do
_ <- symbol "λ{():"
f <- parseTerm
_ <- symbol "}"
return $ Use f
-- Parse tuple eliminator: λ{(,):f}
parseGet :: Parser Term
parseGet = do
_ <- symbol "λ{(,):"
f <- parseTerm
_ <- symbol "}"
return $ Get f
-- Parse list eliminator: λ{[]:n;<>:c}
parseMat :: Parser Term
parseMat = do
_ <- symbol "λ{[]:"
n <- parseTerm
_ <- symbol ";<>:"
c <- parseTerm
_ <- symbol "}"
return $ Mat n c
-- Parse annotations: <x:T>
parseAnn :: Parser Term
parseAnn = angles $ do
x <- parseTerm
_ <- symbol ":"
t <- parseTerm
return $ Ann x t
-- Parse let expressions: !v;f
parseLet :: Parser Term
parseLet = do
_ <- symbol "!"
v <- parseTerm
_ <- symbol ";"
f <- parseTerm
return $ Let v f
-- Parse list types: T[]
parseLst :: Parser Term
parseLst = do
t <- parseTermIni -- Use parseTermIni to avoid consuming operators
_ <- symbol "[]"
return $ Lst t
-- Parse from a string with error handling
parseString :: String -> String -> Either String Term
parseString filename input =
case parse parseTerm filename input of
Left err -> Left $ formatError input err
Right res -> Right res
-- Format error messages nicely
formatError :: String -> ParseErrorBundle String Void -> String
formatError input bundle =
let (err, pos) = NE.head $ fst $ attachSourcePos errorOffset (bundleErrors bundle) (bundlePosState bundle)
lin = unPos (sourceLine pos)
col = unPos (sourceColumn pos)
msg = parseErrorTextPretty err
highlighted = highlightError (lin, col) (lin, col + 1) input
in "\nPARSE_ERROR\n" ++
"- Expected: " ++ msg ++ "\n" ++
"- At position: line " ++ show lin ++ ", column " ++ show col ++ "\n" ++
highlighted
-- seems like the parser above is hanging when we try to parse "".
-- why?
7 hours ago
Favicon for openai
GPT OSS 120B | Cerebras
Reasoning (thought for 3.4s)
Short answer
The parser hangs on "" because parseLst is left‑recursive: it calls parseTermIni (which again contains parseLst) without consuming any input. When the input is empty the call chain
parseTermIni → parseLst → parseTermIni → parseLst → …
loops forever, so the top‑level parse parseTerm … never returns.
Why this happens in the current code
-- Parse list types: T[]
parseLst :: Parser Term
parseLst = do
t <- parseTermIni -- ← calls the same entry point again
_ <- symbol "[]"
return $ Lst t
parseLst lives in the choice list of parseTermIni.
Its first action (t <- parseTermIni) immediately invokes the same parser that we are already trying to decide between.
If the input does not start with something that parseTermIni can consume (e.g. the empty string), parseTermIni fails without consuming any characters.
Because the failure is non‑consuming, the try combinator is not used around parseLst; the choice combinator therefore retries the same alternative again, leading to an infinite recursion.
Megaparsec’s back‑tracking machinery will keep trying the same alternative over and over, which looks like the parser “hangs”.
How to fix it (minimal, still “simple and idiomatic”)
The usual way to break left recursion is to force the parser to consume something before recursing. For the list‑type syntax we know that a terminating [] must follow a non‑operator term, so we can:
Parse the inner term with a parser that does not contain parseLst (for instance parseTermIni but without parseLst in the alternatives), or
Use a try guard that checks that the inner term actually consumes input before recursing.
Below is a tiny patch that follows the first approach – it introduces a helper parseTermNoLst that is identical to parseTermIni except that it omits the parseLst alternative. The rest of the file stays untouched.
-- Helper: same as parseTermIni but without the list‑type case.
parseTermNoLst :: Parser Term
parseTermNoLst = choice
[ parseVar
, parseSet
, parseEmp
, parseEfq
, parseUni
, parseOne
, parseNat
, parseZer
, parseNil
, try parseApp
, try parseLam
, try parseAll
, try parseSig
, try parseSwi
, try parseUse
, try parseGet
, try parseMat
, try parseAnn
, try parseLet
, try parseSuc
-- note: deliberately *no* parseLst here
]
-- Updated list‑type parser: now it uses the non‑recursive helper.
parseLst :: Parser Term
parseLst = do
t <- parseTermNoLst -- guarantees progress before we look for "[]"
_ <- symbol "[]"
return $ Lst t
Because parseTermNoLst cannot call parseLst, the recursion is eliminated and the parser returns a proper error (or a result) on the empty string instead of looping.
Additional polishing (optional)
You don’t have to adopt the helper if you prefer a try‑based guard, e.g.:
parseLst = try $ do
t <- parseTermIni
notFollowedBy (symbol "[]") <?> "non‑empty term before list brackets"
_ <- symbol "[]"
return $ Lst t
Both solutions satisfy the original requirements:
keep the implementation simple and idiomatic,
avoid any “inventive” parsing tricks,
preserve the nice error‑message infrastructure you already have.
Bottom line: the hang is caused by left recursion introduced by parseLst. Replace the direct call to parseTermIni with a version that cannot immediately recurse (or guard it with try/notFollowedBy). After that the parser will correctly report an error on "" instead of looping.
7 hours ago
Favicon for moonshotai
Kimi K2
The parser hangs on empty input because:
parseTermIni is a choice among many alternatives
that choice is called by parseTermEnd ...
and parseTermEnd itself is part of the RHS of every
operator rule (, <> -> :), which finish by immediately
calling parseTerm again.
So with the empty string no alternative in parseTermIni
succeeds: the first guard (<|>) keeps the parser in the
back-tracking state while nothing consumes any input.
With an infinite stream of empty choices, Megaparsec keeps
retrying and never returns ⇒ observable “hang”.
Simple, idiomatic fix: make the top-level rule
fail quickly when the input is empty.
parseString :: String -> String -> Either String Term
parseString filename input =
if all (`elem` " \t\n\r") input -- nothing but whitespace
then Left "PARSE_ERROR\n- Expected: non-empty program"
else case parse (sc *> parseTerm <* eof) filename input of
Left err -> Left $ formatError input err
Right res -> Right res
That is the whole change – it keeps everything else intact
and stops an empty input from looping forever.
7 hours ago
Favicon for qwen
Qwen3 Coder
The parser hangs when trying to parse an empty string ("") because the parseTerm parser (and specifically the sub-parsers like parseVar, parseApp, etc.) do not consume any input when they fail — and more importantly, some of these parsers succeed without consuming input.
Let’s look at a few cases:
🔍 Problem Origins
Empty Input Matched by identifier:
The parseVar parser tries to parse an identifier.
An identifier is defined as:
identifier = lexeme $ do
head <- letterChar
tail <- many alphaNumChar
return (head : tail)
This will fail immediately on an empty input because there's no letterChar.
try Parsers in parseTermIni:
Many parsers are wrapped with try, so if they fail, the parser backtracks.
But if any parser partially consumes or doesn’t advance, and then fails, it might get stuck in a loop or infinite backtrack.
No Base Case / Default Clause:
If you reach the end of the input or provide an empty string, none of the parsers match properly and you get a failure—but since the failure isn’t caused by unexpected input, but rather the lack thereof, this can lead to hanging or very unclear diagnostics.
parseApp with Empty Parens:
The parseApp = parens $ do f <- parseTerm; xs <- many parseTerm; ... assumes there is at least a function term inside. However, when parsing "", it never even reaches past symbol "(".
✅ How to Fix It
Add a base case that explicitly rejects an empty input. One safe and idiomatic way is to add a guard or enforce the parser must make progress. For example, we can use Megaparsec's built-in eof handling or throw a custom message:
🔧 Solution: Add a guard for eof rejection
At the start of your top-level parseTerm, you should check for unexpected EOF, especially if you're expecting something non-empty.
Update the top-level entry point like this:
-- Top-level parser; rejects empty input
parseTermTop :: Parser Term
parseTermTop = do
sc
res <- parseTerm
eof
return res
Then update your final parseString to use this new top-level parser:
parseString :: String -> String -> Either String Term
parseString filename input =
case parse parseTermTop filename input of
Left err -> Left $ formatError input err
Right res -> Right res
And add the import for eof if not present:
import Text.Megaparsec (eof)
🧪 Test Case
Try parsing "" now:
parseString "test" ""
-- Should now yield a proper error like:
-- PARSE_ERROR
-- - Expected: term
-- - At position: line 1, column 1
-- ...
⚠️ Also Validate: Non-Empty identifier
Make sure your lexer also explicitly rejects unexpected inputs. You can do additional checks or rely on standard behavior.
Currently, identifier expects a letter at the beginning — which works correctly for rejecting blank input — but the broader issue was about progress-checking and termination.
✅ Final Tip
If you ever write recursive descent parsers manually, always make sure:
Every branch advances the input cursor unless it is a clear base case.
Failures are robustly handled (with good error positions).
You test with extreme edge cases like: "", " ", "(" — these often trip up parsers.
Let me know if you want help adding automatic indentation sensitivity or line/column reporting!
7 hours ago
Favicon for qwen
Qwen3 235B A22B Thinking 2507
Reasoning (thought for 112.5s)
After reviewing the provided parser, the issue causing the hang on empty input stems from calling highlightError with invalid positions when the input string is empty. The error position (1, 1) is out of bounds for an empty string, causing highlightError to hang (likely due to internal bounds-checking issues). Here's the minimal, idiomatic fix:
module Parse where
import Data.Void
import Highlight (highlightError)
import Text.Megaparsec
import Text.Megaparsec.Char
import Type
import qualified Data.List.NonEmpty as NE
import qualified Text.Megaparsec.Char.Lexer as L
type Parser = Parsec Void String
-- Space consumer
sc :: Parser ()
sc = L.space space1 (L.skipLineComment "--") (L.skipBlockComment "{-" "-}")
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
symbol :: String -> Parser String
symbol = L.symbol sc
parens :: Parser a -> Parser a
parens = between (symbol "(") (symbol ")")
angles :: Parser a -> Parser a
angles = between (symbol "<") (symbol ">")
braces :: Parser a -> Parser a
braces = between (symbol "{") (symbol "}")
brackets :: Parser a -> Parser a
brackets = between (symbol "[") (symbol "]")
identifier :: Parser String
identifier = lexeme $ do
head <- letterChar
tail <- many alphaNumChar
return (head : tail)
-- Main term parser
parseTerm :: Parser Term
parseTerm = do
term <- parseTermIni
parseTermEnd term
-- Initial term parser for non-operator constructs
parseTermIni :: Parser Term
parseTermIni = choice
[ parseVar
, parseSet
, parseEmp
, parseEfq
, parseUni
, parseOne
, parseNat
, parseZer
, parseNil
, try parseApp
, try parseLam
, try parseAll
, try parseSig
, try parseSwi
, try parseUse
, try parseGet
, try parseMat
, try parseAnn
, try parseLet
, try parseSuc
, try parseLst
]
-- Term end parser for operators and continuations
parseTermEnd :: Term -> Parser Term
parseTermEnd term = choice
[ do
_ <- symbol ","
t <- parseTerm
return $ Tup term t
, do
_ <- symbol "<>"
t <- parseTerm
return $ Con term t
, do
_ <- symbol "->"
t <- parseTerm
return $ All term t
, do
_ <- symbol ":"
t <- parseTerm
return $ Chk term t
, return term
]
-- Individual parsers for terms
parseVar :: Parser Term
parseVar = do
name <- identifier
return $ Var name
parseSet :: Parser Term
parseSet = do
_ <- symbol "*"
return Set
parseEmp :: Parser Term
parseEmp = do
_ <- symbol "⊥"
return Emp
parseEfq :: Parser Term
parseEfq = do
_ <- symbol "λ{}"
return Efq
parseUni :: Parser Term
parseUni = do
_ <- symbol "⊤"
return Uni
parseOne :: Parser Term
parseOne = do
_ <- symbol "()"
return One
parseNat :: Parser Term
parseNat = do
_ <- symbol "ℕ"
return Nat
parseZer :: Parser Term
parseZer = do
_ <- symbol "0"
return Zer
parseNil :: Parser Term
parseNil = do
_ <- symbol "[]"
return Nil
parseSuc :: Parser Term
parseSuc = do
_ <- symbol "+"
n <- parseTerm
return $ Suc n
parseApp :: Parser Term
parseApp = parens $ do
f <- parseTerm
xs <- many parseTerm
return $ foldl App f xs
-- Parse lambda abstractions: λx.body
parseLam :: Parser Term
parseLam = do
_ <- symbol "λ" <|> symbol "\\"
k <- identifier
_ <- symbol "."
body <- parseTerm
return $ Lam k (\v -> body)
-- Parse universal quantifiers: Π(A).B or Pi(A).B
parseAll :: Parser Term
parseAll = do
_ <- symbol "Π" <|> symbol "Pi"
a <- parseTerm
_ <- symbol "."
b <- parseTerm
return $ All a b
-- Parse sigma types: Σ(A).B or Sigma(A).B
parseSig :: Parser Term
parseSig = do
_ <- symbol "Σ" <|> symbol "Sigma"
a <- parseTerm
_ <- symbol "."
b <- parseTerm
return $ Sig a b
-- Parse nat switch: λ{0:z;+:s}
parseSwi :: Parser Term
parseSwi = do
_ <- symbol "λ{"
_ <- symbol "0:"
z <- parseTerm
_ <- symbol ";+:"
s <- parseTerm
_ <- symbol "}"
return $ Swi z s
-- Parse unit eliminator: λ{():f}
parseUse :: Parser Term
parseUse = do
_ <- symbol "λ{():"
f <- parseTerm
_ <- symbol "}"
return $ Use f
-- Parse tuple eliminator: λ{(,):f}
parseGet :: Parser Term
parseGet = do
_ <- symbol "λ{(,):"
f <- parseTerm
_ <- symbol "}"
return $ Get f
-- Parse list eliminator: λ{[]:n;<>:c}
parseMat :: Parser Term
parseMat = do
_ <- symbol "λ{[]:"
n <- parseTerm
_ <- symbol ";<>:"
c <- parseTerm
_ <- symbol "}"
return $ Mat n c
-- Parse annotations: <x:T>
parseAnn :: Parser Term
parseAnn = angles $ do
x <- parseTerm
_ <- symbol ":"
t <- parseTerm
return $ Ann x t
-- Parse let expressions: !v;f
parseLet :: Parser Term
parseLet = do
_ <- symbol "!"
v <- parseTerm
_ <- symbol ";"
f <- parseTerm
return $ Let v f
-- Parse list types: T[]
parseLst :: Parser Term
parseLst = do
t <- parseTermIni
_ <- symbol "[]"
return $ Lst t
-- Parse from a string with error handling
parseString :: String -> String -> Either String Term
parseString filename input =
case parse parseTerm filename input of
Left err -> Left $ formatError input err
Right res -> Right res
-- Format error messages nicely with empty-input guard
formatError :: String -> ParseErrorBundle String Void -> String
formatError input bundle =
if input == ""
then "\nPARSE_ERROR\n- Expected: a term\n- Detected: empty input\n- At position: line 1, column 1\nEMPTY INPUT\n"
else
let (err, pos) = NE.head $ fst $ attachSourcePos errorOffset (bundleErrors bundle) (bundlePosState bundle)
lin = unPos (sourceLine pos)
col = unPos (sourceColumn pos)
msg = parseErrorTextPretty err
highlighted = highlightError (lin, col) (lin, col + 1) input
in "\nPARSE_ERROR\n" ++
"- Expected: " ++ msg ++ "\n" ++
"- At position: line " ++ show lin ++ ", column " ++ show col ++ "\n" ++
highlighted
Key fixes:
Added empty input guard in formatError to avoid calling highlightError with invalid positions when input is empty
Corrected lambda symbol from "\ to "\\" (proper Haskell string escape for backslash)
Preserved idiomatic structure without adding complexity
Error message specifically addresses empty input with clear position information
This solution:
Fixes the hang by sanitizing input before calling highlightError
Maintains all specified parsing requirements (right-associative operators, application syntax, etc.)
Provides precise error messages by short-circuiting empty input cases
Adheres strictly to the "don't be inventive" requirement by only changing the minimal necessary code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment