Skip to content

Instantly share code, notes, and snippets.

@KiJeong-Lim
Last active December 22, 2021 04:00
Show Gist options
  • Save KiJeong-Lim/84320de928374d8d7de757b20c7504c8 to your computer and use it in GitHub Desktop.
Save KiJeong-Lim/84320de928374d8d7de757b20c7504c8 to your computer and use it in GitHub Desktop.
input/output-files of my lexer generator
\hshead {
module Aladdin.Front.Analyzer.Lexer where
import Aladdin.Front.Analyzer.Grammar
import Aladdin.Front.Header
}
\target {
token-type: "Token"
lexer-name: "runAladdinLexer"
}
\define $small_letter = 'a'-'z'
\define $big_letter = 'A'-'Z'
\define $digit = '0'-'9'
\define $Char = [. \ '\n' \ '\\' \ '\"'] + "\\n" + "\\\\" + "\\\"" + "\\\'" + "\\t"
\define $Word = [$big_letter $small_letter] [$small_letter $digit '_' $big_letter]* + "_"
\xmatch $Word:
T_Word (SLoc (row1, col1) (row2, col2)) this
\xmatch "\"" $Char* "\"":
T_Str (SLoc (row1, col1) (row2, col2)) this
\xmatch [' ' '\n']+: skip
\hstail {
}
module Aladdin.Front.Analyzer.Lexer where
import Aladdin.Front.Analyzer.Grammar
import Aladdin.Front.Header
import qualified Control.Monad.Trans.State.Strict as XState
import qualified Data.Functor.Identity as XIdentity
import qualified Data.Map.Strict as XMap
import qualified Data.Set as XSet
-- the following codes are generated by LGS.
data DFA
= DFA
{ getInitialQOfDFA :: Int
, getFinalQsOfDFA :: XMap.Map Int Int
, getTransitionsOfDFA :: XMap.Map (Int, Char) Int
, getMarkedQsOfDFA :: XMap.Map Int (Bool, XSet.Set Int)
, getPseudoFinalsOfDFA :: XSet.Set Int
}
deriving ()
runAladdinLexer :: String -> Either (Int, Int) [Token]
runAladdinLexer = runAladdinLexer_this . addLoc 1 1 where
-- 1: "\"" ([. \ '\\' \ '\"' \ '\n'] + "\\" ['\"' '\'' '\\' 'n' 't'])* "\\"
-- 2: "\"" ([. \ '\\' \ '\"' \ '\n'] + "\\" ['\"' '\'' '\\' 'n' 't'])*
-- 3: ""
-- 4: "_"
-- 5: ['A'-'Z' 'a'-'z'] ['0'-'9' 'A'-'Z' '_' 'a'-'z']*
-- 6: "\"" ([. \ '\\' \ '\"' \ '\n'] + "\\" ['\"' '\'' '\\' 'n' 't'])* "\""
-- 7: ['\n' ' ']+
theDFA :: DFA
theDFA = DFA
{ getInitialQOfDFA = 3
, getFinalQsOfDFA = XMap.fromAscList [(4, 1), (5, 1), (6, 2), (7, 3)]
, getTransitionsOfDFA = XMap.fromAscList
[ ((1, '"'), 2), ((1, '\''), 2), ((1, '\\'), 2), ((1, 'n'), 2), ((1, 't'), 2)
, ((2, ' '), 2), ((2, '!'), 2), ((2, '"'), 6), ((2, '#'), 2), ((2, '$'), 2), ((2, '%'), 2), ((2, '&'), 2), ((2, '\''), 2), ((2, '('), 2), ((2, ')'), 2), ((2, '*'), 2), ((2, '+'), 2), ((2, ','), 2), ((2, '-'), 2), ((2, '.'), 2), ((2, '/'), 2), ((2, '0'), 2), ((2, '1'), 2), ((2, '2'), 2), ((2, '3'), 2), ((2, '4'), 2), ((2, '5'), 2), ((2, '6'), 2), ((2, '7'), 2), ((2, '8'), 2), ((2, '9'), 2), ((2, ':'), 2), ((2, ';'), 2), ((2, '<'), 2), ((2, '='), 2), ((2, '>'), 2), ((2, '?'), 2), ((2, '@'), 2), ((2, 'A'), 2), ((2, 'B'), 2), ((2, 'C'), 2), ((2, 'D'), 2), ((2, 'E'), 2), ((2, 'F'), 2), ((2, 'G'), 2), ((2, 'H'), 2), ((2, 'I'), 2), ((2, 'J'), 2), ((2, 'K'), 2), ((2, 'L'), 2), ((2, 'M'), 2), ((2, 'N'), 2), ((2, 'O'), 2), ((2, 'P'), 2), ((2, 'Q'), 2), ((2, 'R'), 2), ((2, 'S'), 2), ((2, 'T'), 2), ((2, 'U'), 2), ((2, 'V'), 2), ((2, 'W'), 2), ((2, 'X'), 2), ((2, 'Y'), 2), ((2, 'Z'), 2), ((2, '['), 2), ((2, '\\'), 1), ((2, ']'), 2), ((2, '^'), 2), ((2, '_'), 2), ((2, '`'), 2), ((2, 'a'), 2), ((2, 'b'), 2), ((2, 'c'), 2), ((2, 'd'), 2), ((2, 'e'), 2), ((2, 'f'), 2), ((2, 'g'), 2), ((2, 'h'), 2), ((2, 'i'), 2), ((2, 'j'), 2), ((2, 'k'), 2), ((2, 'l'), 2), ((2, 'm'), 2), ((2, 'n'), 2), ((2, 'o'), 2), ((2, 'p'), 2), ((2, 'q'), 2), ((2, 'r'), 2), ((2, 's'), 2), ((2, 't'), 2), ((2, 'u'), 2), ((2, 'v'), 2), ((2, 'w'), 2), ((2, 'x'), 2), ((2, 'y'), 2), ((2, 'z'), 2), ((2, '{'), 2), ((2, '|'), 2), ((2, '}'), 2), ((2, '~'), 2)
, ((3, '\n'), 7), ((3, ' '), 7), ((3, '"'), 2), ((3, 'A'), 5), ((3, 'B'), 5), ((3, 'C'), 5), ((3, 'D'), 5), ((3, 'E'), 5), ((3, 'F'), 5), ((3, 'G'), 5), ((3, 'H'), 5), ((3, 'I'), 5), ((3, 'J'), 5), ((3, 'K'), 5), ((3, 'L'), 5), ((3, 'M'), 5), ((3, 'N'), 5), ((3, 'O'), 5), ((3, 'P'), 5), ((3, 'Q'), 5), ((3, 'R'), 5), ((3, 'S'), 5), ((3, 'T'), 5), ((3, 'U'), 5), ((3, 'V'), 5), ((3, 'W'), 5), ((3, 'X'), 5), ((3, 'Y'), 5), ((3, 'Z'), 5), ((3, '_'), 4), ((3, 'a'), 5), ((3, 'b'), 5), ((3, 'c'), 5), ((3, 'd'), 5), ((3, 'e'), 5), ((3, 'f'), 5), ((3, 'g'), 5), ((3, 'h'), 5), ((3, 'i'), 5), ((3, 'j'), 5), ((3, 'k'), 5), ((3, 'l'), 5), ((3, 'm'), 5), ((3, 'n'), 5), ((3, 'o'), 5), ((3, 'p'), 5), ((3, 'q'), 5), ((3, 'r'), 5), ((3, 's'), 5), ((3, 't'), 5), ((3, 'u'), 5), ((3, 'v'), 5), ((3, 'w'), 5), ((3, 'x'), 5), ((3, 'y'), 5), ((3, 'z'), 5)
, ((5, '0'), 5), ((5, '1'), 5), ((5, '2'), 5), ((5, '3'), 5), ((5, '4'), 5), ((5, '5'), 5), ((5, '6'), 5), ((5, '7'), 5), ((5, '8'), 5), ((5, '9'), 5), ((5, 'A'), 5), ((5, 'B'), 5), ((5, 'C'), 5), ((5, 'D'), 5), ((5, 'E'), 5), ((5, 'F'), 5), ((5, 'G'), 5), ((5, 'H'), 5), ((5, 'I'), 5), ((5, 'J'), 5), ((5, 'K'), 5), ((5, 'L'), 5), ((5, 'M'), 5), ((5, 'N'), 5), ((5, 'O'), 5), ((5, 'P'), 5), ((5, 'Q'), 5), ((5, 'R'), 5), ((5, 'S'), 5), ((5, 'T'), 5), ((5, 'U'), 5), ((5, 'V'), 5), ((5, 'W'), 5), ((5, 'X'), 5), ((5, 'Y'), 5), ((5, 'Z'), 5), ((5, '_'), 5), ((5, 'a'), 5), ((5, 'b'), 5), ((5, 'c'), 5), ((5, 'd'), 5), ((5, 'e'), 5), ((5, 'f'), 5), ((5, 'g'), 5), ((5, 'h'), 5), ((5, 'i'), 5), ((5, 'j'), 5), ((5, 'k'), 5), ((5, 'l'), 5), ((5, 'm'), 5), ((5, 'n'), 5), ((5, 'o'), 5), ((5, 'p'), 5), ((5, 'q'), 5), ((5, 'r'), 5), ((5, 's'), 5), ((5, 't'), 5), ((5, 'u'), 5), ((5, 'v'), 5), ((5, 'w'), 5), ((5, 'x'), 5), ((5, 'y'), 5), ((5, 'z'), 5)
, ((7, '\n'), 7), ((7, ' '), 7)
]
, getMarkedQsOfDFA = XMap.fromAscList []
, getPseudoFinalsOfDFA = XSet.fromAscList []
}
runDFA :: DFA -> [((Int, Int), Char)] -> Either (Int, Int) ((Maybe Int, [((Int, Int), Char)]), [((Int, Int), Char)])
runDFA (DFA q0 qfs deltas markeds pseudo_finals) = if XSet.null pseudo_finals then Right . XIdentity.runIdentity . runFast else runSlow where
loop1 :: Int -> [((Int, Int), Char)] -> [((Int, Int), Char)] -> XState.StateT (Maybe Int, [((Int, Int), Char)]) XIdentity.Identity [((Int, Int), Char)]
loop1 q buffer [] = return buffer
loop1 q buffer (ch : str) = do
(latest, accepted) <- XState.get
case XMap.lookup (q, snd ch) deltas of
Nothing -> return (buffer ++ [ch] ++ str)
Just p -> case XMap.lookup p qfs of
Nothing -> loop1 p (buffer ++ [ch]) str
latest' -> do
XState.put (latest', accepted ++ buffer ++ [ch])
loop1 p [] str
loop2 :: XSet.Set Int -> Int -> [((Int, Int), Char)] -> [((Int, Int), Char)] -> XState.StateT [((Int, Int), Char)] XIdentity.Identity [((Int, Int), Char)]
loop2 qs q [] buffer = return buffer
loop2 qs q (ch : str) buffer = do
case XMap.lookup (q, snd ch) deltas of
Nothing -> return (buffer ++ [ch] ++ str)
Just p -> case p `XSet.member` qs of
False -> loop2 qs p str (buffer ++ [ch])
True -> do
accepted <- XState.get
XState.put (accepted ++ buffer ++ [ch])
loop2 qs p str []
loop3 :: XSet.Set Int -> Int -> [((Int, Int), Char)] -> [((Int, Int), Char)] -> XState.StateT [((Int, Int), Char)] XIdentity.Identity [((Int, Int), Char)]
loop3 qs q [] buffer = return buffer
loop3 qs q (ch : str) buffer = do
case XMap.lookup (q, snd ch) deltas of
Nothing -> return (buffer ++ [ch] ++ str)
Just p -> case p `XSet.member` qs of
False -> loop3 qs p str (buffer ++ [ch])
True -> do
accepted <- XState.get
XState.put (accepted ++ buffer ++ [ch])
return str
runFast :: [((Int, Int), Char)] -> XIdentity.Identity ((Maybe Int, [((Int, Int), Char)]), [((Int, Int), Char)])
runFast input = do
(rest, (latest, accepted)) <- XState.runStateT (loop1 q0 [] input) (Nothing, [])
case latest >>= flip XMap.lookup markeds of
Nothing -> return ((latest, accepted), rest)
Just (True, qs) -> do
(rest', accepted') <- XState.runStateT (loop2 qs q0 accepted []) []
return ((latest, accepted'), rest' ++ rest)
Just (False, qs) -> do
(rest', accepted') <- XState.runStateT (loop3 qs q0 accepted []) []
return ((latest, accepted'), rest' ++ rest)
runSlow :: [((Int, Int), Char)] -> Either (Int, Int) ((Maybe Int, [((Int, Int), Char)]), [((Int, Int), Char)])
runSlow = undefined
addLoc :: Int -> Int -> String -> [((Int, Int), Char)]
addLoc _ _ [] = []
addLoc row col (ch : chs) = if ch == '\n' then ((row, col), ch) : addLoc (row + 1) 1 chs else ((row, col), ch) : addLoc row (col + 1) chs
runAladdinLexer_this :: [((Int, Int), Char)] -> Either (Int, Int) [Token]
runAladdinLexer_this [] = return []
runAladdinLexer_this str0 = do
let return_one my_token = return [my_token]
dfa_output <- runDFA theDFA str0
(str1, piece) <- case dfa_output of
((_, []), _) -> Left (fst (head str0))
((Just label, accepted), rest) -> return (rest, ((label, map snd accepted), (fst (head accepted), fst (head (reverse accepted)))))
_ -> Left (fst (head str0))
tokens1 <- case piece of
((1, this), ((row1, col1), (row2, col2))) -> return_one (T_Word (SLoc (row1, col1) (row2, col2)) this)
((2, this), ((row1, col1), (row2, col2))) -> return_one (T_Str (SLoc (row1, col1) (row2, col2)) this)
((3, this), ((row1, col1), (row2, col2))) -> return []
tokens2 <- runAladdinLexer_this str1
return (tokens1 ++ tokens2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment