Skip to content

Instantly share code, notes, and snippets.

@pedrominicz
Last active July 27, 2022 16:23
Show Gist options
  • Save pedrominicz/aaf19e6fb4d46be97af73f5cf3605009 to your computer and use it in GitHub Desktop.
Save pedrominicz/aaf19e6fb4d46be97af73f5cf3605009 to your computer and use it in GitHub Desktop.
Alex: strict bytestring with user state
{
module Lazy where
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as B
}
%wrapper "monadUserState-bytestring"
$alpha = [A-Za-z]
$digit = [0-9]
@string = [$alpha _] [$alpha $digit _]*
tokens :-
<0> $white+ ;
<0> @string { string }
<0> "{-" { start }
<comment> "{-" { start }
<comment> "-}" { end }
<comment> [. \n] ;
{
type AlexUserState = Int
alexInitUserState :: AlexUserState
alexInitUserState = 0
increaseDepth :: Alex ()
increaseDepth = do
alexSetStartCode comment
Alex $ \s -> Right (s { alex_ust = alex_ust s + 1 }, ())
decreaseDepth :: Alex Int
decreaseDepth =
Alex $ \s -> Right (s { alex_ust = alex_ust s - 1 }, alex_ust s - 1)
alexEOF :: Alex (Maybe ByteString)
alexEOF = return Nothing
type Token = ByteString
start :: AlexAction (Maybe Token)
start input len = increaseDepth >> skip input len
end :: AlexAction (Maybe Token)
end input len = do
depth <- decreaseDepth
if depth == 0
then alexSetStartCode 0
else return ()
skip input len
string :: AlexAction (Maybe Token)
string (_, _, str, _) len = do
-- Copy the `ByteString`, otherwise, it will prevent the input from being
-- garbage collected. See section 5.3.6 of the Alex User Guide.
return . Just . B.toStrict $ B.take len str
scanner :: ByteString -> Either String [Token]
scanner str = runAlex (B.fromStrict str) go
where
go :: Alex [Token]
go = do
token <- alexMonadScan
case token of
Just token -> (token :) <$> go
Nothing -> return []
main :: IO ()
main = do
putStrLn "Lazy"
str <- B.toStrict <$> B.getContents
print $ scanner str
-- Hack to silence unused import warning.
hack :: a
hack = undefined Data.Char.isAlpha
}
{
module Minimal where
import Control.Monad.Except
import Control.Monad.State.Strict
import Data.ByteString (ByteString)
import Data.Word (Word8)
import qualified Data.ByteString as B
}
$alpha = [A-Za-z]
$digit = [0-9]
@string = [$alpha _] [$alpha $digit _]*
tokens :-
<0> $white+ ;
<0> @string { string }
<0> "{-" { start }
<comment> "{-" { start }
<comment> "-}" { end }
<comment> [. \n] ;
{
type Byte = Word8
type AlexInput = (Byte, ByteString, Int)
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte (_, str, n) =
case B.uncons str of
Just (c, str) -> Just (c, (c, str, n + 1))
Nothing -> Nothing
data AlexState = AlexState
{ input :: !AlexInput
, startcode :: !Int
, depth :: !Int
}
type Alex a = StateT AlexState (Except String) a
runAlex :: ByteString -> Alex a -> Either String a
runAlex str alex = runExcept $ evalStateT alex (AlexState (10, str, 0) 0 0)
type Token = ByteString
token :: Alex (Maybe Token)
token = do
input <- input <$> get
startcode <- startcode <$> get
case alexScan input startcode of
AlexEOF -> return Nothing
AlexError _ -> throwError "error"
AlexSkip input _ -> do
modify $ \s -> s { input = input }
token
AlexToken input' len action -> do
modify $ \s -> s { input = input' }
action input len
start :: AlexInput -> Int -> Alex (Maybe Token)
start _ _ = do
modify $ \s -> s { startcode = comment, depth = depth s + 1 }
token
end :: AlexInput -> Int -> Alex (Maybe Token)
end _ _ = do
modify $ \s -> s { depth = depth s - 1 }
depth <- depth <$> get
when (depth == 0) . modify $ \s -> s { startcode = 0 }
token
string :: AlexInput -> Int -> Alex (Maybe Token)
string (_, str, _) len = do
-- Copy the `ByteString`, otherwise, it will prevent the input from being
-- garbage collected. See section 5.3.6 of the Alex User Guide.
return . Just . B.copy $ B.take len str
scanner :: ByteString -> Either String [Token]
scanner str = runAlex str go
where
go :: Alex [Token]
go = do
token <- token
case token of
Just token -> (token :) <$> go
Nothing -> return []
main :: IO ()
main = do
putStrLn "Minimal"
str <- B.getContents
print $ scanner str
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment