Created
January 18, 2022 03:42
-
-
Save mkohlhaas/f5b0d90e54650047f9a7aea686fb8582 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Ch19Parser where | |
import Prelude | |
import Data.Either (Either(..)) | |
import Data.Maybe (Maybe(..)) | |
import Data.Tuple (Tuple(..)) | |
import Data.Array as A | |
import Data.Unfoldable as U | |
import Data.Traversable (class Traversable, sequence) | |
import Data.Generic.Rep (class Generic) | |
import Data.Show.Generic (genericShow) | |
import Control.Alt (class Alt, (<|>)) | |
import Data.String.CodePoints (codePointFromChar) | |
import Data.CodePoint.Unicode (isDecDigit, isAlpha) | |
import Data.String.CodeUnits (uncons, fromCharArray) | |
import Effect (Effect) | |
import Effect.Console (log) | |
-- The Parsing State is going to need to be passed from Parser to Parser, i.e. when the current Parser is done, | |
-- it passes what’s left of the String to the next Parser who takes a stab at parsing what’s left. Also, if a | |
-- single Parser in the chain were to fail, we want to short-circuit the parsing and return the error, hopefully | |
-- with some useful information as to what went wrong. | |
------------------------------ | |
-- Data Types and Type Classes | |
------------------------------ | |
-- e = error type, a = return type | |
class ParserError (e :: Type) where | |
eof :: e | |
invalidChar :: String -> e | |
data PError = EOF | InvalidChar String -- application specific parse error type | |
type ParserState a = Tuple String a -- left-over string and parsed value | |
type ParseFunction e a = ParserError e => String -> Either e (ParserState a) | |
newtype Parser e a = Parser (ParseFunction e a) | |
data Threeple a b c = Threeple a b c | |
-- Create a Functor instance for Parser (map over the parsed value) | |
instance functorParser :: Functor (Parser e) where | |
map f g = Parser \s -> map f <$> parse g s | |
-- 2. Rewrite Apply instance in do notation | |
instance applyParser :: Apply (Parser e) where | |
apply f g = Parser \s -> do | |
Tuple s1 h <- parse f s | |
Tuple s2 a <- parse g s1 | |
pure $ Tuple s2 $ h a | |
-- or simply: | |
-- apply = ap | |
-- apply f g = Parser \s -> case parse f s of | |
-- Left e -> Left e | |
-- Right (Tuple s1 h) -> case parse g s1 of | |
-- Left e -> Left e | |
-- Right (Tuple s2 a) -> Right $ Tuple s2 $ h a | |
-- Create an Applicative instance for Parser (this is our Applicative Parser) | |
instance applicativeParser :: Applicative (Parser e) where | |
pure a = Parser \s -> Right $ Tuple s a | |
-- 1. Create a Bind instance for Parser | |
instance bindParser :: Bind (Parser e) where | |
bind p f = Parser \s -> do | |
Tuple s1 x <- parse p s | |
parse (f x) s1 | |
-- the same: | |
-- bind p f = Parser \s -> parse p s >>= \(Tuple s1 x) -> parse (f x) s1 | |
-- the same: | |
-- bind p f = Parser \s -> case parse p s of | |
-- Left x -> Left x | |
-- Right (Tuple s1 a) -> parse (f a) s1 | |
-- Create a Monad instance for Parser | |
instance monadParser :: Monad (Parser e) | |
-- Create Alt instance for Parser | |
instance altParser :: Alt (Parser e) where | |
alt p1 p2 = Parser \s -> case parse p1 s of | |
Right x -> Right x | |
Left _ -> parse p2 s | |
------------------- | |
-- Using the Parser | |
------------------- | |
-- Write a parse function | |
parse :: ∀ e a. Parser e a -> ParseFunction e a | |
parse (Parser f) = f | |
-- Use parse in map and apply | |
-- Create Show instance for PError | |
derive instance genericPError :: Generic PError _ | |
instance showPError :: Show PError where | |
show = genericShow | |
-- Create ParserError instance for PError | |
instance parserErrorPError :: ParserError PError where | |
eof = EOF | |
invalidChar s = InvalidChar s | |
-- Write a char parser using String libary function uncons | |
char :: ∀ e. Parser e Char | |
char = Parser \s -> case uncons s of | |
Nothing -> Left eof | |
Just {head, tail } -> Right $ Tuple tail head | |
-- Rewrite the applicative parsers using do notation, append new function name with B for bind: | |
-- Write a two-char parser | |
twoCharsA :: ∀ e. Parser e (Tuple Char Char) | |
twoCharsA = Tuple <$> char <*> char | |
twoCharsB :: ∀ e. Parser e (Tuple Char Char) | |
twoCharsB = do | |
a <- char | |
b <- char | |
pure $ Tuple a b | |
-- Write a char-two-char parser | |
threeCharsA :: ∀ e. Parser e (Tuple Char (Tuple Char Char)) | |
threeCharsA = Tuple <$> char <*> twoCharsA | |
threeCharsB :: ∀ e. Parser e (Tuple Char (Tuple Char Char)) | |
threeCharsB = do | |
a <- char | |
b <- twoCharsB | |
pure $ Tuple a b | |
-- Write a Show instance for Threeple | |
derive instance genericThreeple :: Generic (Threeple a b c) _ | |
instance showThreeple :: (Show a, Show b, Show c) => Show (Threeple a b c) where | |
show = genericShow | |
-- Write a Threeple 3-char parser | |
threeCharsA' :: ∀ e. Parser e (Threeple Char Char Char) | |
threeCharsA' = Threeple <$> char <*> char <*> char | |
threeCharsB' :: ∀ e. Parser e (Threeple Char Char Char) | |
threeCharsB' = do | |
c1 <- char | |
c2 <- char | |
c3 <- char | |
pure $ Threeple c1 c2 c3 | |
-- Write a 3-char parser returning a String using library function fromCharArray | |
threeCharsA'' :: ∀ e. Parser e String | |
threeCharsA'' = (\c1 c2 c3 -> fromCharArray [c1, c2, c3]) <$> char <*> char <*> char | |
threeCharsB'' :: ∀ e. Parser e String | |
threeCharsB'' = do | |
c1 <- char | |
c2 <- char | |
c3 <- char | |
pure $ fromCharArray [c1, c2, c3] | |
-- write a parse' function which includes the error type in its signature | |
parse' :: ∀ a. Parser PError a -> ParseFunction PError a | |
parse' = parse | |
-- Write a 10-char parser in the same manner as the 3-char parser | |
tenCharsA :: ∀ e. Parser e String | |
tenCharsA = (\c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 -> fromCharArray [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]) | |
<$> char <*> char <*> char <*> char <*> char <*> char <*> char <*> char <*> char <*> char | |
tenCharsB :: ∀ e. Parser e String | |
tenCharsB = do | |
c1 <- char | |
c2 <- char | |
c3 <- char | |
c4 <- char | |
c5 <- char | |
c6 <- char | |
c7 <- char | |
c8 <- char | |
c9 <- char | |
c10 <- char | |
pure $ fromCharArray [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] | |
-- Do the same using sequence from Data.Traversable and replicate from Data.Array using this helper function | |
count :: ∀ e a. Int -> Parser e a -> Parser e (Array a) | |
count n p | n < 0 = pure [] | |
| otherwise = sequence (A.replicate n p) | |
-- Make count more generic and call it count' | |
count' :: ∀ e a f. Traversable f => U.Unfoldable f => Int -> Parser e a -> Parser e (f a) | |
count' n p | n < 0 = pure U.none | |
| otherwise = sequence (U.replicate n p) | |
count'' :: ∀ e. Int -> Parser e Char -> Parser e String | |
count'' n p = fromCharArray <$> count n p | |
satisfy :: ∀ e. ParserError e => String -> (Char -> Boolean) -> Parser e Char | |
satisfy expected pred = char >>= \c -> if pred c then pure c else fail $ invalidChar expected | |
fail :: ∀ e a. ParserError e => e -> Parser e a | |
fail e = Parser $ const $ Left e | |
digit :: ∀ e. ParserError e => Parser e Char | |
digit = satisfy "digit" $ isDecDigit <<< codePointFromChar | |
letter :: ∀ e. ParserError e => Parser e Char | |
letter = satisfy "letter" $ isAlpha <<< codePointFromChar | |
alphaNum :: ∀ e. ParserError e => Parser e Char | |
alphaNum = letter <|> digit <|> fail (invalidChar "alphaNum") | |
test :: Effect Unit | |
test = do | |
log "Ch. 19 Monadic Parser." | |
log "-------------------------" | |
log "-- Applicative Parsers --" | |
log "-------------------------" | |
log $ show $ (parse char "ABC" :: Either PError _) -- (Right (Tuple "BC" 'A')). | |
log $ show $ (parse twoCharsA "ABC" :: Either PError _) -- (Right (Tuple "C" (Tuple 'A' 'B'))). | |
log $ show $ (parse threeCharsA "ABC" :: Either PError _) -- (Right (Tuple "" (Tuple 'A' (Tuple 'B' 'C')))) | |
log $ show $ (parse threeCharsA' "ABC" :: Either PError _) -- (Right (Tuple "" (Threeple 'A' 'B' 'C'))) | |
log $ show $ (parse threeCharsA'' "ABC" :: Either PError _) -- (Right (Tuple "" "ABC")) | |
log $ show $ parse' char "ABC" -- (Right (Tuple "BC" 'A')). | |
log $ show $ parse' twoCharsA "ABC" -- (Right (Tuple "C" (Tuple 'A' 'B'))). | |
log $ show $ parse' threeCharsA "ABC" -- (Right (Tuple "" (Tuple 'A' (Tuple 'B' 'C')))) | |
log $ show $ parse' threeCharsA' "ABC" -- (Right (Tuple "" (Threeple 'A' 'B' 'C'))) | |
log $ show $ parse' threeCharsA'' "ABC" -- (Right (Tuple "" "ABC")) | |
log $ show $ parse' threeCharsA "A" -- (Left EOF) | |
log $ show $ parse' tenCharsA "ABCDEFGHIJKLMNOPQRSTUVXYZ" -- (Right (Tuple "KLMNOPQRSTUVXYZ" "ABCDEFGHIJ")) | |
log $ show $ parse' (fromCharArray <$> (count 10 char)) "ABCDEFGHIJKLMNOPQRSTUVXYZ" -- (Right (Tuple "KLMNOPQRSTUVXYZ" "ABCDEFGHIJ")) | |
log $ show $ parse' (fromCharArray <$> (count' 10 char)) "ABCDEFGHIJKLMNOPQRSTUVXYZ" -- (Right (Tuple "KLMNOPQRSTUVXYZ" "ABCDEFGHIJ")) | |
log "---------------------" | |
log "-- Monadic Parsers --" | |
log "---------------------" | |
log $ show $ (parse char "ABC" :: Either PError _) -- (Right (Tuple "BC" 'A')). | |
log $ show $ (parse twoCharsB "ABC" :: Either PError _) -- (Right (Tuple "C" (Tuple 'A' 'B'))). | |
log $ show $ (parse threeCharsB "ABC" :: Either PError _) -- (Right (Tuple "" (Tuple 'A' (Tuple 'B' 'C')))) | |
log $ show $ (parse threeCharsB' "ABC" :: Either PError _) -- (Right (Tuple "" (Threeple 'A' 'B' 'C'))) | |
log $ show $ (parse threeCharsB'' "ABC" :: Either PError _) -- (Right (Tuple "" "ABC")) | |
log $ show $ parse' char "ABC" -- (Right (Tuple "BC" 'A')). | |
log $ show $ parse' twoCharsB "ABC" -- (Right (Tuple "C" (Tuple 'A' 'B'))). | |
log $ show $ parse' threeCharsB "ABC" -- (Right (Tuple "" (Tuple 'A' (Tuple 'B' 'C')))) | |
log $ show $ parse' threeCharsB' "ABC" -- (Right (Tuple "" (Threeple 'A' 'B' 'C'))) | |
log $ show $ parse' threeCharsB'' "ABC" -- (Right (Tuple "" "ABC")) | |
log $ show $ parse' threeCharsB "A" -- (Left EOF) | |
log $ show $ parse' tenCharsB "ABCDEFGHIJKLMNOPQRSTUVXYZ" -- (Right (Tuple "KLMNOPQRSTUVXYZ" "ABCDEFGHIJ")) | |
log $ show $ parse' (count'' 3 digit) "123456" -- (Right (Tuple "456" "123")) | |
log $ show $ parse' (count'' 3 digit) "abc456" -- (Left (InvalidChar "digit")) | |
log $ show $ parse' (count'' 4 letter) "Freddy" -- (Right (Tuple "dy" "Fred")) | |
log $ show $ parse' (count'' 10 alphaNum) "a1b2c3d4e5" -- (Right (Tuple "" "a1b2c3d4e5")) | |
log $ show $ parse' (count'' 10 alphaNum) "######" -- (Left (InvalidChar "alphaNum")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ name = "my-project" | |
, dependencies = [ "console", "effect", "prelude", "psci-support", "either", "tuples", "maybe", "strings", "arrays", "foldable-traversable", "unfoldable", "unicode", "control" ] | |
, packages = ./packages.dhall | |
, sources = [ "src/**/*.purs", "test/**/*.purs" ] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment