Skip to content

Instantly share code, notes, and snippets.

@barrucadu
Created May 27, 2013 17:44
Show Gist options
  • Save barrucadu/5658260 to your computer and use it in GitHub Desktop.
Save barrucadu/5658260 to your computer and use it in GitHub Desktop.
A gentle introduction to parsec (in particular, applicative parsec), by constructing a parser for CSV files http://blog.barrucadu.co.uk/2013/05/27/a-gentle-introduction-to-parsec/
-- Example code for a CSV file parser using Parsec. Some parts are
-- repeated, with differing functionality. To avoid name conflicts,
-- comment out the versions you don't want to play with.
module CSVParser (parseCSV, parseCSV') where
import Control.Applicative ((<$), (<*), (*>), liftA)
import Data.Char (chr)
import Data.Either (either)
import Text.Parsec
---------- Simple cell parser
cell :: Parsec String () String
cell = many $ noneOf ",\n"
----------
---------- More advanced cell parser
cell :: Parsec String () String
cell = cell' <|> many (noneOf ",\n")
where cell' = between (char '"') (char '"') $ many chr
chr = noneOf "\"" <|> try ('"' <$ string "\"\"")
----------
---------- Final cell parser
cell :: Parsec String () String
cell = cell' <|> many (chr ",\n\\")
where cell' = between (char '"') (char '"') $ many (chr "\\\"")
chr bad = try literal <|> specialchar <|> noneOf bad
specialCharacters :: [(Char, Char)]
specialCharacters = [('0', '\0'), ('a', '\a'), ('b', '\b'), ('f', '\f'),
('n', '\n'), ('r', '\r'), ('t', '\t'), ('v', '\v'),
('"', '"'), ('\'', '\''), ('\\', '\\')]
specialchar :: Parsec String () Char
specialchar = char '\\' *> special' specialCharacters
where special' ((esc, c):cs) = char esc *> parserReturn c <|> special' cs
special' [] = parserZero
literalNumbers :: [(Char, Parsec String () Char)]
literalNumbers = [('x', hexDigit), ('o', octDigit)]
literal :: Parsec String () Char
literal = char '\\' *> literal' literalNumbers
where literal' ((c, f):cs) = char c *> tochar c (many1 f) <|> literal' cs
literal' [] = parserZero
tochar c = liftA $ \s -> chr . read $ '0' : c : s
----------
---------- Line parser
line :: Parsec String () [String]
line = cell `sepBy1` char ','
----------
---------- File parser
csvp :: Parsec String () [[String]]
csvp = line `endBy` newline <* eof
----------
---------- Parser interfaces
parseCSV :: String -> Either ParseError [[String]]
parseCSV = parse csvp ""
parseCSV' :: String -> Maybe [[String]]
parseCSV' = either (const Nothing) Just . parseCSV
----------
---------- Exercise 1: allow for multi-character special character escapes.
specialCharacters :: [(String, Char)]
specialCharacters = [("0", '\0'), ("a", '\a'), ("b", '\b'), ("f", '\f'),
("n", '\n'), ("r", '\r'), ("t", '\t'), ("v", '\v'),
("\"", '"'), ("'", '\''), ("\\", '\\'),
("SOH", '\SOH'), ("STX", '\STX'), ("ETX", '\ETX'),
("EOT", '\EOT'), ("ENQ", '\ENQ'), ("ACK", '\ACK'),
("BEL", '\BEL'), ("BS", '\BS'), ("HT", '\HT'),
("LF", '\LF'), ("VT", '\VT'), ("FF", '\FF'),
("CR", '\CR'), ("SO", '\SO'), ("SI", '\SI'),
("DLE", '\DLE'), ("DC1", '\DC1'), ("DC2", '\DC2'),
("DC3", '\DC3'), ("DC4", '\DC4'), ("NAK", '\NAK'),
("SYN", '\SYN'), ("ETB", '\ETB'), ("CAN", '\CAN'),
("EM", '\EM'), ("SUB", '\SUB'), ("ESC", '\ESC'),
("FS", '\FS'), ("GS", '\GS'), ("RS", '\RS'),
("US", '\US'), ("SP", '\SP'), ("DEL", '\DEL')]
specialchar :: Parsec String () Char
specialchar = char '\\' *> foldr ((<|>) . special') parserZero specialCharacters
where special' (esc, c) = try (string esc) *> parserReturn c
----------
---------- Exercise 2: allow for decimal character literals
literalNumbers :: [(Maybe Char, Parsec String () Char)]
literalNumbers = [(Just 'x', hexDigit), (Just 'o', octDigit), (Nothing, digit)]
literal :: Parsec String () Char
literal = char '\\' *> try (foldr ((<|>) . literal') parserZero literalNumbers)
where literal' (c'@(Just c), f) = char c *> tochar c' (many1 f)
literal' (Nothing, f) = tochar Nothing (many1 f)
tochar (Just c) = liftA $ \s -> chr . read $ '0' : c : s
tochar Nothing = liftA $ \s -> chr $ read s
----------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment