-
-
Save erikvanoosten/f5b9d500871aea565445 to your computer and use it in GitHub Desktop.
Parboiled2 CSV parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* based on comments in https://github.com/sirthias/parboiled2/issues/61 */ | |
case class Parboiled2CsvParser(input: ParserInput, delimeter: String) extends Parser { | |
def DQUOTE = '"' | |
def DELIMITER_TOKEN = rule(capture(delimeter)) | |
def DQUOTE2 = rule("\"\"" ~ push("\"")) | |
def CRLF = rule(capture("\n\r" | "\n")) | |
def NON_CAPTURING_CRLF = rule("\n\r" | "\n") | |
val delims = s"$delimeter\r\n" + DQUOTE | |
def TXT = rule(capture(!anyOf(delims) ~ ANY)) | |
val WHITESPACE = CharPredicate(" \t") | |
def SPACES: Rule0 = rule(oneOrMore(WHITESPACE)) | |
def escaped = rule(optional(SPACES) ~ | |
DQUOTE ~ (zeroOrMore(DELIMITER_TOKEN | TXT | CRLF | DQUOTE2) ~ DQUOTE ~ | |
optional(SPACES)) ~> (_.mkString(""))) | |
def nonEscaped = rule(zeroOrMore(TXT | capture(DQUOTE)) ~> (_.mkString(""))) | |
def field = rule(escaped | nonEscaped) | |
def row: Rule1[Seq[String]] = rule(oneOrMore(field).separatedBy(delimeter)) | |
def file = rule(zeroOrMore(row).separatedBy(NON_CAPTURING_CRLF)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment