Skip to content

Instantly share code, notes, and snippets.

@pedrominicz
Created July 27, 2022 16:23
Show Gist options
  • Save pedrominicz/f6f11b10b745492c49cd52cb3edca48e to your computer and use it in GitHub Desktop.
Save pedrominicz/f6f11b10b745492c49cd52cb3edca48e to your computer and use it in GitHub Desktop.
Alex: strict text
{
module Lex where
import Data.Bits
import Data.Char
import Data.Word
import Data.Text (Text)
import qualified Data.Text as T
}
tokens :-
$white+ ;
[^ $white]+ { id }
{
type Byte = Word8
type AlexInput = ([Byte], Text)
encode :: Char -> (Byte, [Byte])
encode c = let (x, xs) = go (ord c) in (fromIntegral x, map fromIntegral xs)
where
go :: Int -> (Int, [Int])
go x
| x <= 0x7f = (x, [])
| x <= 0x7ff = (0xc0 + x `shiftR` 6, [rest1])
| x <= 0xffff = (0xe0 + x `shiftR` 12, [rest2, rest1])
| otherwise = (0xf0 + x `shiftR` 18, [rest3, rest2, rest1])
where
rest1, rest2, rest3 :: Int
rest1 = 0x80 + x .&. 0x3f
rest2 = 0x80 + x `shiftR` 6 .&. 0x3f
rest3 = 0x80 + x `shiftR` 12 .&. 0x3f
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte ([], str) =
case T.uncons str of
Just (c, str) -> let (x, xs) = encode c in Just (x, (xs, str))
Nothing -> Nothing
alexGetByte (x:xs, str) = Just (x, (xs, str))
scanner :: Text -> Maybe [Text]
scanner str = go ([], str)
where
go input@(_, str) =
case alexScan input 0 of
AlexEOF -> Just []
AlexError _ -> Nothing
AlexSkip input _ -> go input
AlexToken input len act -> do
rest <- go input
return $ act (T.take len str) : rest
main :: IO ()
main = do
putStrLn "text"
-- Not using `T.getLine` because I don't want to import `Data.Text.IO` since
-- the lexer doesn't use it.
str <- T.pack <$> getLine
print $ scanner str
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment