Last active
July 27, 2022 16:23
-
-
Save pedrominicz/aaf19e6fb4d46be97af73f5cf3605009 to your computer and use it in GitHub Desktop.
Alex: strict bytestring with user state
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
module Lazy where | |
import Data.ByteString (ByteString) | |
import qualified Data.ByteString.Lazy as B | |
} | |
%wrapper "monadUserState-bytestring" | |
$alpha = [A-Za-z] | |
$digit = [0-9] | |
@string = [$alpha _] [$alpha $digit _]* | |
tokens :- | |
<0> $white+ ; | |
<0> @string { string } | |
<0> "{-" { start } | |
<comment> "{-" { start } | |
<comment> "-}" { end } | |
<comment> [. \n] ; | |
{ | |
type AlexUserState = Int | |
alexInitUserState :: AlexUserState | |
alexInitUserState = 0 | |
increaseDepth :: Alex () | |
increaseDepth = do | |
alexSetStartCode comment | |
Alex $ \s -> Right (s { alex_ust = alex_ust s + 1 }, ()) | |
decreaseDepth :: Alex Int | |
decreaseDepth = | |
Alex $ \s -> Right (s { alex_ust = alex_ust s - 1 }, alex_ust s - 1) | |
alexEOF :: Alex (Maybe ByteString) | |
alexEOF = return Nothing | |
type Token = ByteString | |
start :: AlexAction (Maybe Token) | |
start input len = increaseDepth >> skip input len | |
end :: AlexAction (Maybe Token) | |
end input len = do | |
depth <- decreaseDepth | |
if depth == 0 | |
then alexSetStartCode 0 | |
else return () | |
skip input len | |
string :: AlexAction (Maybe Token) | |
string (_, _, str, _) len = do | |
-- Copy the `ByteString`, otherwise, it will prevent the input from being | |
-- garbage collected. See section 5.3.6 of the Alex User Guide. | |
return . Just . B.toStrict $ B.take len str | |
scanner :: ByteString -> Either String [Token] | |
scanner str = runAlex (B.fromStrict str) go | |
where | |
go :: Alex [Token] | |
go = do | |
token <- alexMonadScan | |
case token of | |
Just token -> (token :) <$> go | |
Nothing -> return [] | |
main :: IO () | |
main = do | |
putStrLn "Lazy" | |
str <- B.toStrict <$> B.getContents | |
print $ scanner str | |
-- Hack to silence unused import warning. | |
hack :: a | |
hack = undefined Data.Char.isAlpha | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
module Minimal where | |
import Control.Monad.Except | |
import Control.Monad.State.Strict | |
import Data.ByteString (ByteString) | |
import Data.Word (Word8) | |
import qualified Data.ByteString as B | |
} | |
$alpha = [A-Za-z] | |
$digit = [0-9] | |
@string = [$alpha _] [$alpha $digit _]* | |
tokens :- | |
<0> $white+ ; | |
<0> @string { string } | |
<0> "{-" { start } | |
<comment> "{-" { start } | |
<comment> "-}" { end } | |
<comment> [. \n] ; | |
{ | |
type Byte = Word8 | |
type AlexInput = (Byte, ByteString, Int) | |
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput) | |
alexGetByte (_, str, n) = | |
case B.uncons str of | |
Just (c, str) -> Just (c, (c, str, n + 1)) | |
Nothing -> Nothing | |
data AlexState = AlexState | |
{ input :: !AlexInput | |
, startcode :: !Int | |
, depth :: !Int | |
} | |
type Alex a = StateT AlexState (Except String) a | |
runAlex :: ByteString -> Alex a -> Either String a | |
runAlex str alex = runExcept $ evalStateT alex (AlexState (10, str, 0) 0 0) | |
type Token = ByteString | |
token :: Alex (Maybe Token) | |
token = do | |
input <- input <$> get | |
startcode <- startcode <$> get | |
case alexScan input startcode of | |
AlexEOF -> return Nothing | |
AlexError _ -> throwError "error" | |
AlexSkip input _ -> do | |
modify $ \s -> s { input = input } | |
token | |
AlexToken input' len action -> do | |
modify $ \s -> s { input = input' } | |
action input len | |
start :: AlexInput -> Int -> Alex (Maybe Token) | |
start _ _ = do | |
modify $ \s -> s { startcode = comment, depth = depth s + 1 } | |
token | |
end :: AlexInput -> Int -> Alex (Maybe Token) | |
end _ _ = do | |
modify $ \s -> s { depth = depth s - 1 } | |
depth <- depth <$> get | |
when (depth == 0) . modify $ \s -> s { startcode = 0 } | |
token | |
string :: AlexInput -> Int -> Alex (Maybe Token) | |
string (_, str, _) len = do | |
-- Copy the `ByteString`, otherwise, it will prevent the input from being | |
-- garbage collected. See section 5.3.6 of the Alex User Guide. | |
return . Just . B.copy $ B.take len str | |
scanner :: ByteString -> Either String [Token] | |
scanner str = runAlex str go | |
where | |
go :: Alex [Token] | |
go = do | |
token <- token | |
case token of | |
Just token -> (token :) <$> go | |
Nothing -> return [] | |
main :: IO () | |
main = do | |
putStrLn "Minimal" | |
str <- B.getContents | |
print $ scanner str | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment