Last active
August 29, 2015 14:07
-
-
Save ifukazoo/6635e77b87ed0c2cc972 to your computer and use it in GitHub Desktop.
CSVファイルパース
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import System.Environment(getArgs) | |
import System.Exit | |
import Control.Monad(when) | |
import Text.Parsec | |
import Text.Parsec.Char | |
import Text.Parsec.String | |
-- COMMA = %x2C | |
comma :: Parser Char | |
comma = char ',' | |
-- DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2] | |
dquote :: Parser Char | |
dquote = char '"' | |
-- CR = %x0D ;as per section 6.1 of RFC 2234 [2] | |
-- LF = %x0A ;as per section 6.1 of RFC 2234 [2] | |
-- CRLF = CR LF ;as per section 6.1 of RFC 2234 [2] | |
eol :: Parser String | |
eol = try crlf <|> lf <|> cr <?> "end of line" | |
where | |
cr = string "\r" | |
lf = string "\n" | |
crlf = string "\r\n" | |
-- TEXTDATA = %x20-21 / %x23-2B / %x2D-7E | |
textdata :: Parser Char | |
textdata = noneOf $ concat [['\x00'..'\x1f'],['\x22', '\x2C']] | |
-- non-escaped = *TEXTDATA | |
nonEscaped :: Parser String | |
nonEscaped = many textdata | |
-- escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE | |
escaped :: Parser String | |
escaped = | |
do | |
dquote | |
text <- many (textdata <|> comma <|> char '\n' <|> char '\r' <|> try ddquote) | |
dquote | |
return text | |
where | |
ddquote = dquote >> dquote >> return '"' | |
-- field = (escaped / non-escaped) | |
field :: Parser String | |
field = try escaped <|> nonEscaped | |
-- record = field *(COMMA field) | |
record :: Parser [String] | |
record = | |
do | |
first <- field | |
afters <- many (commaAndField) | |
return (first:afters) | |
where | |
commaAndField = comma >> field >>= \f -> return f | |
-- file = [header CRLF] record *(CRLF record) [CRLF] | |
-- optional な CRLF は (CRLF record) でもあるため, | |
-- 最後の改行もレコードとしてカウントされる. | |
csvFile :: Parser [[String]] | |
csvFile = | |
do | |
first <- record | |
afters <- many (crlfAndRecord) | |
optional eol | |
eof | |
return (first:afters) | |
where | |
crlfAndRecord = eol >> record >>= \r -> return r | |
parseCSV :: String -> Either ParseError [[String]] | |
parseCSV contents = parse csvFile "(unknown)" contents | |
main = do | |
args <- getArgs | |
when (length args /= 1) $ do | |
putStrLn "usage: <command> <csv filename>" | |
exitFailure | |
contents <- readFile(head args) | |
case parseCSV contents of | |
Left e -> do putStrLn "Error parsing input:" | |
print e | |
Right r -> mapM_ print' r | |
where | |
-- print はユニコード値を出力してしまうため自作. | |
print' [] = do putStr "[]\n" | |
print' strs = do putStr "[" | |
putStr $ head strs | |
mapM_ (\str -> putStr ("," ++ str)) $ tail strs | |
putStr "]\n" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment