Last active
August 29, 2015 14:24
-
-
Save lildata/0d06bf9247219600ce51 to your computer and use it in GitHub Desktop.
A parser combinator based CSV parser (RFC4180 http://tools.ietf.org/html/rfc4180) as demonstrated here : http://stackoverflow.com/questions/5063022/use-scala-parser-combinator-to-parse-csv-files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| object CSV extends RegexParsers { | |
| override val skipWhitespace = false // meaningful spaces in CSV | |
| def COMMA = "," | |
| def DQUOTE = "\"" | |
| def DQUOTE2 = "\"\"" ^^ { case _ => "\"" } // combine 2 dquotes into 1 | |
| def CRLF = "\r\n" | "\n" | |
| def TXT = "[^\",\r\n]".r | |
| def SPACES = "[ \t]+".r | |
| def file: Parser[List[List[String]]] = repsep(record, CRLF) <~ (CRLF?) | |
| def record: Parser[List[String]] = repsep(field, COMMA) | |
| def field: Parser[String] = escaped|nonescaped | |
| def escaped: Parser[String] = { | |
| ((SPACES?)~>DQUOTE~>((TXT|COMMA|CRLF|DQUOTE2)*)<~DQUOTE<~(SPACES?)) ^^ { | |
| case ls => ls.mkString("") | |
| } | |
| } | |
| def nonescaped: Parser[String] = (TXT*) ^^ { case ls => ls.mkString("") } | |
| def parse(s: String) = parseAll(file, s) match { | |
| case Success(res, _) => res | |
| case e => throw new Exception(e.toString) | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment