Created
July 27, 2022 16:23
-
-
Save pedrominicz/f6f11b10b745492c49cd52cb3edca48e to your computer and use it in GitHub Desktop.
Alex: strict text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
module Lex where | |
import Data.Bits | |
import Data.Char | |
import Data.Word | |
import Data.Text (Text) | |
import qualified Data.Text as T | |
} | |
tokens :- | |
$white+ ; | |
[^ $white]+ { id } | |
{ | |
type Byte = Word8 | |
type AlexInput = ([Byte], Text) | |
encode :: Char -> (Byte, [Byte]) | |
encode c = let (x, xs) = go (ord c) in (fromIntegral x, map fromIntegral xs) | |
where | |
go :: Int -> (Int, [Int]) | |
go x | |
| x <= 0x7f = (x, []) | |
| x <= 0x7ff = (0xc0 + x `shiftR` 6, [rest1]) | |
| x <= 0xffff = (0xe0 + x `shiftR` 12, [rest2, rest1]) | |
| otherwise = (0xf0 + x `shiftR` 18, [rest3, rest2, rest1]) | |
where | |
rest1, rest2, rest3 :: Int | |
rest1 = 0x80 + x .&. 0x3f | |
rest2 = 0x80 + x `shiftR` 6 .&. 0x3f | |
rest3 = 0x80 + x `shiftR` 12 .&. 0x3f | |
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput) | |
alexGetByte ([], str) = | |
case T.uncons str of | |
Just (c, str) -> let (x, xs) = encode c in Just (x, (xs, str)) | |
Nothing -> Nothing | |
alexGetByte (x:xs, str) = Just (x, (xs, str)) | |
scanner :: Text -> Maybe [Text] | |
scanner str = go ([], str) | |
where | |
go input@(_, str) = | |
case alexScan input 0 of | |
AlexEOF -> Just [] | |
AlexError _ -> Nothing | |
AlexSkip input _ -> go input | |
AlexToken input len act -> do | |
rest <- go input | |
return $ act (T.take len str) : rest | |
main :: IO () | |
main = do | |
putStrLn "text" | |
-- Not using `T.getLine` because I don't want to import `Data.Text.IO` since | |
-- the lexer doesn't use it. | |
str <- T.pack <$> getLine | |
print $ scanner str | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment