Skip to content

Instantly share code, notes, and snippets.

@ifukazoo
Last active August 29, 2015 14:07
Show Gist options
  • Save ifukazoo/6635e77b87ed0c2cc972 to your computer and use it in GitHub Desktop.
Save ifukazoo/6635e77b87ed0c2cc972 to your computer and use it in GitHub Desktop.
CSVファイルパース
import System.Environment(getArgs)
import System.Exit
import Control.Monad(when)
import Text.Parsec
import Text.Parsec.Char
import Text.Parsec.String
-- COMMA = %x2C
comma :: Parser Char
comma = char ','
-- DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2]
dquote :: Parser Char
dquote = char '"'
-- CR = %x0D ;as per section 6.1 of RFC 2234 [2]
-- LF = %x0A ;as per section 6.1 of RFC 2234 [2]
-- CRLF = CR LF ;as per section 6.1 of RFC 2234 [2]
eol :: Parser String
eol = try crlf <|> lf <|> cr <?> "end of line"
where
cr = string "\r"
lf = string "\n"
crlf = string "\r\n"
-- TEXTDATA = %x20-21 / %x23-2B / %x2D-7E
textdata :: Parser Char
textdata = noneOf $ concat [['\x00'..'\x1f'],['\x22', '\x2C']]
-- non-escaped = *TEXTDATA
nonEscaped :: Parser String
nonEscaped = many textdata
-- escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
escaped :: Parser String
escaped =
do
dquote
text <- many (textdata <|> comma <|> char '\n' <|> char '\r' <|> try ddquote)
dquote
return text
where
ddquote = dquote >> dquote >> return '"'
-- field = (escaped / non-escaped)
field :: Parser String
field = try escaped <|> nonEscaped
-- record = field *(COMMA field)
record :: Parser [String]
record =
do
first <- field
afters <- many (commaAndField)
return (first:afters)
where
commaAndField = comma >> field >>= \f -> return f
-- file = [header CRLF] record *(CRLF record) [CRLF]
-- optional な CRLF は (CRLF record) でもあるため,
-- 最後の改行もレコードとしてカウントされる.
csvFile :: Parser [[String]]
csvFile =
do
first <- record
afters <- many (crlfAndRecord)
optional eol
eof
return (first:afters)
where
crlfAndRecord = eol >> record >>= \r -> return r
parseCSV :: String -> Either ParseError [[String]]
parseCSV contents = parse csvFile "(unknown)" contents
main = do
args <- getArgs
when (length args /= 1) $ do
putStrLn "usage: <command> <csv filename>"
exitFailure
contents <- readFile(head args)
case parseCSV contents of
Left e -> do putStrLn "Error parsing input:"
print e
Right r -> mapM_ print' r
where
-- print はユニコード値を出力してしまうため自作.
print' [] = do putStr "[]\n"
print' strs = do putStr "["
putStr $ head strs
mapM_ (\str -> putStr ("," ++ str)) $ tail strs
putStr "]\n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment