VictorTaelin · August 6, 2025 00:37
diff --git a/gistfile1.txt b/gistfile1.txt
 -- TASK: create a Parser for Kind7.
 -- - don't be inventive, just use a simple approach that is guaranteed to work
 -- - keep it simple and idiomatic
 -- - include nice error messages

 -- On the 'parseApp parser:
 -- - support (f x y z) as an alias to (((f x) y) z)
 -- - do so by parsing '(', then terms until you hit a ')', then foldl to build the App

 -- Note on operators:
 -- - syntaxes like 'a,b', 'a<>b', 'a->b', 'a:b', etc., MUST be treated as operators
 -- - there is no precedence, and operators always fold right: a,b,c,d → a,(b,(c,d))
 -- - implemented his using as imple logic:
 -- - the term parser is split in two: parseTermIni and parseTermEnd
 -- - the parseTermIni handles all core syntaxes (i.e., non-operators)
 -- - the parseTermEnd receives the Term parsed by parseTermIni, and attempts to parse an operator
 -- - this allows fast parsing with less backtracking and better error messages

 -- Note on error messages:
 -- - it should be inspired (but not identical) on this old error implementation:
 -- import Highlight (highlightError, highlight)
 -- showParseError :: String -> String -> P.ParseError -> IO ()
 -- showParseError filename input err = do
 -- let pos = errorPos err
 -- let lin = sourceLine pos
 -- let col = sourceColumn pos
 -- let msg = extractExpectedTokens err
 -- putStrLn $ setSGRCode [SetConsoleIntensity BoldIntensity] ++ "\nPARSE_ERROR" ++ setSGRCode [Reset]
 -- putStrLn $ "- expected: " ++ msg
 -- putStrLn $ "- detected:"
 -- putStrLn $ highlightError (lin, col) (lin, col + 1) input
 -- putStrLn $ setSGRCode [SetUnderlining SingleUnderline] ++ filename ++
 -- setSGRCode [Reset] ++ " " ++ show lin ++ ":" ++ show col
 -- simplify and adapt it to your own implementation
 -- use the Highlight import. IT ALREADY EXISTS, DO NOT RE-CREATE IT.

 -- About lambdas:
 -- - do NOT handle shadowing or substitution in any way
 -- - just parse the input strings into the terms
 -- - this is handled elsewhre - separation of concerns

 -- remember:
 -- - keep this file as simple, elegant and idiomatic.
 -- - do not write anything beyond what has been explicitly asked.

 module Parse where

 import Data.Void
 import Highlight (highlightError)
 import Text.Megaparsec
 import Text.Megaparsec.Char
 import Type
 import qualified Data.List.NonEmpty as NE
 import qualified Text.Megaparsec.Char.Lexer as L

 type Parser = Parsec Void String

 -- Space consumer
 sc :: Parser ()
 sc = L.space space1 (L.skipLineComment "--") (L.skipBlockComment "{-" "-}")

 lexeme :: Parser a -> Parser a
 lexeme = L.lexeme sc

 symbol :: String -> Parser String
 symbol = L.symbol sc

 parens :: Parser a -> Parser a
 parens = between (symbol "(") (symbol ")")

 angles :: Parser a -> Parser a
 angles = between (symbol "<") (symbol ">")

 braces :: Parser a -> Parser a
 braces = between (symbol "{") (symbol "}")

 brackets :: Parser a -> Parser a
 brackets = between (symbol "[") (symbol "]")

 identifier :: Parser String
 identifier = lexeme $ do
 head <- letterChar
 tail <- many alphaNumChar
 return (head : tail)

 -- Main term parser
 parseTerm :: Parser Term
 parseTerm = do
 term <- parseTermIni
 parseTermEnd term

 -- Initial term parser for non-operator constructs
 parseTermIni :: Parser Term
 parseTermIni = choice
 [ parseVar
 , parseSet
 , parseEmp
 , parseEfq
 , parseUni
 , parseOne
 , parseNat
 , parseZer
 , parseNil
 , try parseApp
 , try parseLam
 , try parseAll
 , try parseSig
 , try parseSwi
 , try parseUse
 , try parseGet
 , try parseMat
 , try parseAnn
 , try parseLet
 , try parseSuc
 , try parseLst
 ]

 -- Term end parser for operators and continuations
 parseTermEnd :: Term -> Parser Term
 parseTermEnd term = choice
 [ do
 _ <- symbol ","
 t <- parseTerm
 return $ Tup term t
 , do
 _ <- symbol "<>"
 t <- parseTerm
 return $ Con term t
 , do
 _ <- symbol "->"
 t <- parseTerm
 return $ All term t
 , do
 _ <- symbol ":"
 t <- parseTerm
 return $ Chk term t
 , return term
 ]

 -- Individual parsers for terms
 parseVar :: Parser Term
 parseVar = do
 name <- identifier
 return $ Var name

 parseSet :: Parser Term
 parseSet = do
 _ <- symbol "*"
 return Set

 parseEmp :: Parser Term
 parseEmp = do
 _ <- symbol "⊥"
 return Emp

 parseEfq :: Parser Term
 parseEfq = do
 _ <- symbol "λ{}"
 return Efq

 parseUni :: Parser Term
 parseUni = do
 _ <- symbol "⊤"
 return Uni

 parseOne :: Parser Term
 parseOne = do
 _ <- symbol "()"
 return One

 parseNat :: Parser Term
 parseNat = do
 _ <- symbol "ℕ"
 return Nat

 parseZer :: Parser Term
 parseZer = do
 _ <- symbol "0"
 return Zer

 parseNil :: Parser Term
 parseNil = do
 _ <- symbol "[]"
 return Nil

 parseSuc :: Parser Term
 parseSuc = do
 _ <- symbol "+"
 n <- parseTerm
 return $ Suc n

 parseApp :: Parser Term
 parseApp = parens $ do
 f <- parseTerm
 xs <- many parseTerm
 return $ foldl App f xs

 -- Parse lambda abstractions: λx.body
 parseLam :: Parser Term
 parseLam = do
 _ <- symbol "λ" <|> symbol ""
 k <- identifier
 _ <- symbol "."
 body <- parseTerm
 return $ Lam k (\v -> body)

 -- Parse universal quantifiers: Π(A).B or Pi(A).B
 parseAll :: Parser Term
 parseAll = do
 _ <- symbol "Π" <|> symbol "Pi"
 a <- parseTerm
 _ <- symbol "."
 b <- parseTerm
 return $ All a b

 -- Parse sigma types: Σ(A).B or Sigma(A).B
 parseSig :: Parser Term
 parseSig = do
 _ <- symbol "Σ" <|> symbol "Sigma"
 a <- parseTerm
 _ <- symbol "."
 b <- parseTerm
 return $ Sig a b

 -- Parse nat switch: λ{0:z;+:s}
 parseSwi :: Parser Term
 parseSwi = do
 _ <- symbol "λ{"
 _ <- symbol "0:"
 z <- parseTerm
 _ <- symbol ";+:"
 s <- parseTerm
 _ <- symbol "}"
 return $ Swi z s

 -- Parse unit eliminator: λ{():f}
 parseUse :: Parser Term
 parseUse = do
 _ <- symbol "λ{():"
 f <- parseTerm
 _ <- symbol "}"
 return $ Use f

 -- Parse tuple eliminator: λ{(,):f}
 parseGet :: Parser Term
 parseGet = do
 _ <- symbol "λ{(,):"
 f <- parseTerm
 _ <- symbol "}"
 return $ Get f

 -- Parse list eliminator: λ{[]:n;<>:c}
 parseMat :: Parser Term
 parseMat = do
 _ <- symbol "λ{[]:"
 n <- parseTerm
 _ <- symbol ";<>:"
 c <- parseTerm
 _ <- symbol "}"
 return $ Mat n c

 -- Parse annotations: <x:T>
 parseAnn :: Parser Term
 parseAnn = angles $ do
 x <- parseTerm
 _ <- symbol ":"
 t <- parseTerm
 return $ Ann x t

 -- Parse let expressions: !v;f
 parseLet :: Parser Term
 parseLet = do
 _ <- symbol "!"
 v <- parseTerm
 _ <- symbol ";"
 f <- parseTerm
 return $ Let v f

 -- Parse list types: T[]
 parseLst :: Parser Term
 parseLst = do
 t <- parseTermIni -- Use parseTermIni to avoid consuming operators
 _ <- symbol "[]"
 return $ Lst t

 -- Parse from a string with error handling
 parseString :: String -> String -> Either String Term
 parseString filename input =
 case parse parseTerm filename input of
 Left err -> Left $ formatError input err
 Right res -> Right res

 -- Format error messages nicely
 formatError :: String -> ParseErrorBundle String Void -> String
 formatError input bundle =
 let (err, pos) = NE.head $ fst $ attachSourcePos errorOffset (bundleErrors bundle) (bundlePosState bundle)
 lin = unPos (sourceLine pos)
 col = unPos (sourceColumn pos)
 msg = parseErrorTextPretty err
 highlighted = highlightError (lin, col) (lin, col + 1) input
 in "\nPARSE_ERROR\n" ++
 "- Expected: " ++ msg ++ "\n" ++
 "- At position: line " ++ show lin ++ ", column " ++ show col ++ "\n" ++
 highlighted

 -- seems like the parser above is hanging when we try to parse "".
 -- why?






 7 hours ago
 Favicon for openai
 GPT OSS 120B | Cerebras
 Reasoning (thought for 3.4s)
 Short answer

 The parser hangs on "" because parseLst is left‑recursive: it calls parseTermIni (which again contains parseLst) without consuming any input. When the input is empty the call chain


 parseTermIni → parseLst → parseTermIni → parseLst → …
 loops forever, so the top‑level parse parseTerm … never returns.

 Why this happens in the current code

 -- Parse list types: T[]
 parseLst :: Parser Term
 parseLst = do
  t <- parseTermIni        -- ← calls the same entry point again
  _ <- symbol "[]"
  return $ Lst t
 parseLst lives in the choice list of parseTermIni.
 Its first action (t <- parseTermIni) immediately invokes the same parser that we are already trying to decide between.
 If the input does not start with something that parseTermIni can consume (e.g. the empty string), parseTermIni fails without consuming any characters.
 Because the failure is non‑consuming, the try combinator is not used around parseLst; the choice combinator therefore retries the same alternative again, leading to an infinite recursion.
 Megaparsec’s back‑tracking machinery will keep trying the same alternative over and over, which looks like the parser “hangs”.

 How to fix it (minimal, still “simple and idiomatic”)
 The usual way to break left recursion is to force the parser to consume something before recursing. For the list‑type syntax we know that a terminating [] must follow a non‑operator term, so we can:

 Parse the inner term with a parser that does not contain parseLst (for instance parseTermIni but without parseLst in the alternatives), or
 Use a try guard that checks that the inner term actually consumes input before recursing.
 Below is a tiny patch that follows the first approach – it introduces a helper parseTermNoLst that is identical to parseTermIni except that it omits the parseLst alternative. The rest of the file stays untouched.


 -- Helper: same as parseTermIni but without the list‑type case.
 parseTermNoLst :: Parser Term
 parseTermNoLst = choice
  [ parseVar
  , parseSet
  , parseEmp
  , parseEfq
  , parseUni
  , parseOne
  , parseNat
  , parseZer
  , parseNil
  , try parseApp
  , try parseLam
  , try parseAll
  , try parseSig
  , try parseSwi
  , try parseUse
  , try parseGet
  , try parseMat
  , try parseAnn
  , try parseLet
  , try parseSuc
  -- note: deliberately *no* parseLst here
  ]

 -- Updated list‑type parser: now it uses the non‑recursive helper.
 parseLst :: Parser Term
 parseLst = do
  t <- parseTermNoLst      -- guarantees progress before we look for "[]"
  _ <- symbol "[]"
  return $ Lst t
 Because parseTermNoLst cannot call parseLst, the recursion is eliminated and the parser returns a proper error (or a result) on the empty string instead of looping.

 Additional polishing (optional)
 You don’t have to adopt the helper if you prefer a try‑based guard, e.g.:


 parseLst = try $ do
  t <- parseTermIni
  notFollowedBy (symbol "[]") <?> "non‑empty term before list brackets"
  _ <- symbol "[]"
  return $ Lst t
 Both solutions satisfy the original requirements:

 keep the implementation simple and idiomatic,
 avoid any “inventive” parsing tricks,
 preserve the nice error‑message infrastructure you already have.
 Bottom line: the hang is caused by left recursion introduced by parseLst. Replace the direct call to parseTermIni with a version that cannot immediately recurse (or guard it with try/notFollowedBy). After that the parser will correctly report an error on "" instead of looping.







 7 hours ago
 Favicon for moonshotai
 Kimi K2
 The parser hangs on empty input because:

 parseTermIni is a choice among many alternatives
 that choice is called by parseTermEnd ...
 and parseTermEnd itself is part of the RHS of every
 operator rule (, <> -> :), which finish by immediately
 calling parseTerm again.
 So with the empty string no alternative in parseTermIni
 succeeds: the first guard (<|>) keeps the parser in the
 back-tracking state while nothing consumes any input.
 With an infinite stream of empty choices, Megaparsec keeps
 retrying and never returns ⇒ observable “hang”.

 Simple, idiomatic fix: make the top-level rule
 fail quickly when the input is empty.


 parseString :: String -> String -> Either String Term
 parseString filename input =
  if all (`elem` " \t\n\r") input      -- nothing but whitespace
  then Left "PARSE_ERROR\n- Expected: non-empty program"
  else case parse (sc *> parseTerm <* eof) filename input of
         Left err  -> Left $ formatError input err
         Right res -> Right res
 That is the whole change – it keeps everything else intact
 and stops an empty input from looping forever.







 7 hours ago
 Favicon for qwen
 Qwen3 Coder
 The parser hangs when trying to parse an empty string ("") because the parseTerm parser (and specifically the sub-parsers like parseVar, parseApp, etc.) do not consume any input when they fail — and more importantly, some of these parsers succeed without consuming input.

 Let’s look at a few cases:

 🔍 Problem Origins
 Empty Input Matched by identifier:

 The parseVar parser tries to parse an identifier.
 An identifier is defined as:

 identifier = lexeme $ do
  head <- letterChar
  tail <- many alphaNumChar
  return (head : tail)
 This will fail immediately on an empty input because there's no letterChar.
 try Parsers in parseTermIni:

 Many parsers are wrapped with try, so if they fail, the parser backtracks.
 But if any parser partially consumes or doesn’t advance, and then fails, it might get stuck in a loop or infinite backtrack.
 No Base Case / Default Clause:

 If you reach the end of the input or provide an empty string, none of the parsers match properly and you get a failure—but since the failure isn’t caused by unexpected input, but rather the lack thereof, this can lead to hanging or very unclear diagnostics.
 parseApp with Empty Parens:

 The parseApp = parens $ do f <- parseTerm; xs <- many parseTerm; ... assumes there is at least a function term inside. However, when parsing "", it never even reaches past symbol "(".
 ✅ How to Fix It
 Add a base case that explicitly rejects an empty input. One safe and idiomatic way is to add a guard or enforce the parser must make progress. For example, we can use Megaparsec's built-in eof handling or throw a custom message:

 🔧 Solution: Add a guard for eof rejection
 At the start of your top-level parseTerm, you should check for unexpected EOF, especially if you're expecting something non-empty.

 Update the top-level entry point like this:


 -- Top-level parser; rejects empty input
 parseTermTop :: Parser Term
 parseTermTop = do
  sc
  res <- parseTerm
  eof
  return res
 Then update your final parseString to use this new top-level parser:


 parseString :: String -> String -> Either String Term
 parseString filename input =
  case parse parseTermTop filename input of
    Left err -> Left $ formatError input err
    Right res -> Right res
 And add the import for eof if not present:


 import Text.Megaparsec (eof)
 🧪 Test Case
 Try parsing "" now:


 parseString "test" ""
 -- Should now yield a proper error like:
 -- PARSE_ERROR
 -- - Expected: term
 -- - At position: line 1, column 1
 -- ...
 ⚠️ Also Validate: Non-Empty identifier
 Make sure your lexer also explicitly rejects unexpected inputs. You can do additional checks or rely on standard behavior.

 Currently, identifier expects a letter at the beginning — which works correctly for rejecting blank input — but the broader issue was about progress-checking and termination.

 ✅ Final Tip
 If you ever write recursive descent parsers manually, always make sure:

 Every branch advances the input cursor unless it is a clear base case.
 Failures are robustly handled (with good error positions).
 You test with extreme edge cases like: "", " ", "(" — these often trip up parsers.
 Let me know if you want help adding automatic indentation sensitivity or line/column reporting!







 7 hours ago
 Favicon for qwen
 Qwen3 235B A22B Thinking 2507
 Reasoning (thought for 112.5s)
 After reviewing the provided parser, the issue causing the hang on empty input stems from calling highlightError with invalid positions when the input string is empty. The error position (1, 1) is out of bounds for an empty string, causing highlightError to hang (likely due to internal bounds-checking issues). Here's the minimal, idiomatic fix:


 module Parse where

 import Data.Void
 import Highlight (highlightError)
 import Text.Megaparsec
 import Text.Megaparsec.Char
 import Type
 import qualified Data.List.NonEmpty as NE
 import qualified Text.Megaparsec.Char.Lexer as L

 type Parser = Parsec Void String

 -- Space consumer
 sc :: Parser ()
 sc = L.space space1 (L.skipLineComment "--") (L.skipBlockComment "{-" "-}")

 lexeme :: Parser a -> Parser a
 lexeme = L.lexeme sc

 symbol :: String -> Parser String
 symbol = L.symbol sc

 parens :: Parser a -> Parser a
 parens = between (symbol "(") (symbol ")")

 angles :: Parser a -> Parser a
 angles = between (symbol "<") (symbol ">")

 braces :: Parser a -> Parser a
 braces = between (symbol "{") (symbol "}")

 brackets :: Parser a -> Parser a
 brackets = between (symbol "[") (symbol "]")

 identifier :: Parser String
 identifier = lexeme $ do
  head <- letterChar
  tail <- many alphaNumChar
  return (head : tail)

 -- Main term parser
 parseTerm :: Parser Term
 parseTerm = do
  term <- parseTermIni
  parseTermEnd term

 -- Initial term parser for non-operator constructs
 parseTermIni :: Parser Term
 parseTermIni = choice
  [ parseVar
  , parseSet
  , parseEmp
  , parseEfq
  , parseUni
  , parseOne
  , parseNat
  , parseZer
  , parseNil
  , try parseApp
  , try parseLam
  , try parseAll
  , try parseSig
  , try parseSwi
  , try parseUse
  , try parseGet
  , try parseMat
  , try parseAnn
  , try parseLet
  , try parseSuc
  , try parseLst
  ]

 -- Term end parser for operators and continuations
 parseTermEnd :: Term -> Parser Term
 parseTermEnd term = choice
  [ do
      _ <- symbol ","
      t <- parseTerm
      return $ Tup term t
  , do
      _ <- symbol "<>"
      t <- parseTerm
      return $ Con term t
  , do
      _ <- symbol "->"
      t <- parseTerm
      return $ All term t
  , do
      _ <- symbol ":"
      t <- parseTerm
      return $ Chk term t
  , return term
  ]

 -- Individual parsers for terms
 parseVar :: Parser Term
 parseVar = do
  name <- identifier
  return $ Var name

 parseSet :: Parser Term
 parseSet = do
  _ <- symbol "*"
  return Set

 parseEmp :: Parser Term
 parseEmp = do
  _ <- symbol "⊥"
  return Emp

 parseEfq :: Parser Term
 parseEfq = do
  _ <- symbol "λ{}"
  return Efq

 parseUni :: Parser Term
 parseUni = do
  _ <- symbol "⊤"
  return Uni

 parseOne :: Parser Term
 parseOne = do
  _ <- symbol "()"
  return One

 parseNat :: Parser Term
 parseNat = do
  _ <- symbol "ℕ"
  return Nat

 parseZer :: Parser Term
 parseZer = do
  _ <- symbol "0"
  return Zer

 parseNil :: Parser Term
 parseNil = do
  _ <- symbol "[]"
  return Nil

 parseSuc :: Parser Term
 parseSuc = do
  _ <- symbol "+"
  n <- parseTerm
  return $ Suc n

 parseApp :: Parser Term
 parseApp = parens $ do
  f <- parseTerm
  xs <- many parseTerm
  return $ foldl App f xs

 -- Parse lambda abstractions: λx.body
 parseLam :: Parser Term
 parseLam = do
  _ <- symbol "λ" <|> symbol "\\"
  k <- identifier
  _ <- symbol "."
  body <- parseTerm
  return $ Lam k (\v -> body)

 -- Parse universal quantifiers: Π(A).B or Pi(A).B
 parseAll :: Parser Term
 parseAll = do
  _ <- symbol "Π" <|> symbol "Pi"
  a <- parseTerm
  _ <- symbol "."
  b <- parseTerm
  return $ All a b

 -- Parse sigma types: Σ(A).B or Sigma(A).B
 parseSig :: Parser Term
 parseSig = do
  _ <- symbol "Σ" <|> symbol "Sigma"
  a <- parseTerm
  _ <- symbol "."
  b <- parseTerm
  return $ Sig a b

 -- Parse nat switch: λ{0:z;+:s}
 parseSwi :: Parser Term
 parseSwi = do
  _ <- symbol "λ{"
  _ <- symbol "0:"
  z <- parseTerm
  _ <- symbol ";+:"
  s <- parseTerm
  _ <- symbol "}"
  return $ Swi z s

 -- Parse unit eliminator: λ{():f}
 parseUse :: Parser Term
 parseUse = do
  _ <- symbol "λ{():"
  f <- parseTerm
  _ <- symbol "}"
  return $ Use f

 -- Parse tuple eliminator: λ{(,):f}
 parseGet :: Parser Term
 parseGet = do
  _ <- symbol "λ{(,):"
  f <- parseTerm
  _ <- symbol "}"
  return $ Get f

 -- Parse list eliminator: λ{[]:n;<>:c}
 parseMat :: Parser Term
 parseMat = do
  _ <- symbol "λ{[]:"
  n <- parseTerm
  _ <- symbol ";<>:"
  c <- parseTerm
  _ <- symbol "}"
  return $ Mat n c

 -- Parse annotations: <x:T>
 parseAnn :: Parser Term
 parseAnn = angles $ do
  x <- parseTerm
  _ <- symbol ":"
  t <- parseTerm
  return $ Ann x t

 -- Parse let expressions: !v;f
 parseLet :: Parser Term
 parseLet = do
  _ <- symbol "!"
  v <- parseTerm
  _ <- symbol ";"
  f <- parseTerm
  return $ Let v f

 -- Parse list types: T[]
 parseLst :: Parser Term
 parseLst = do
  t <- parseTermIni
  _ <- symbol "[]"
  return $ Lst t

 -- Parse from a string with error handling
 parseString :: String -> String -> Either String Term
 parseString filename input =
  case parse parseTerm filename input of
    Left err -> Left $ formatError input err
    Right res -> Right res

 -- Format error messages nicely with empty-input guard
 formatError :: String -> ParseErrorBundle String Void -> String
 formatError input bundle =
  if input == ""
  then "\nPARSE_ERROR\n- Expected: a term\n- Detected: empty input\n- At position: line 1, column 1\nEMPTY INPUT\n"
  else
    let (err, pos) = NE.head $ fst $ attachSourcePos errorOffset (bundleErrors bundle) (bundlePosState bundle)
        lin = unPos (sourceLine pos)
        col = unPos (sourceColumn pos)
        msg = parseErrorTextPretty err
        highlighted = highlightError (lin, col) (lin, col + 1) input
    in "\nPARSE_ERROR\n" ++
       "- Expected: " ++ msg ++ "\n" ++
       "- At position: line " ++ show lin ++ ", column " ++ show col ++ "\n" ++
       highlighted
 Key fixes:

 Added empty input guard in formatError to avoid calling highlightError with invalid positions when input is empty
 Corrected lambda symbol from "\ to "\\" (proper Haskell string escape for backslash)
 Preserved idiomatic structure without adding complexity
 Error message specifically addresses empty input with clear position information
 This solution:

 Fixes the hang by sanitizing input before calling highlightError
 Maintains all specified parsing requirements (right-associative operators, application syntax, etc.)
 Provides precise error messages by short-circuiting empty input cases
 Adheres strictly to the "don't be inventive" requirement by only changing the minimal necessary code
No results found