Skip to content

Instantly share code, notes, and snippets.

@maciej
Last active December 19, 2015 21:32
Show Gist options
  • Save maciej/f8da7d2db8b686cdd76e to your computer and use it in GitHub Desktop.
Save maciej/f8da7d2db8b686cdd76e to your computer and use it in GitHub Desktop.
Parboiled2 CSV parser
/* based on comments in https://github.com/sirthias/parboiled2/issues/61 */
case class Parboiled2CsvParser(input: ParserInput, delimeter: String) extends Parser {
def DQUOTE = '"'
def DELIMITER_TOKEN = rule(capture(delimeter))
def DQUOTE2 = rule("\"\"" ~ push("\""))
def CRLF = rule(capture("\n\r" | "\n"))
def NON_CAPTURING_CRLF = rule("\n\r" | "\n")
val delims = s"$delimeter\r\n" + DQUOTE
def TXT = rule(capture(!anyOf(delims) ~ ANY))
val WHITESPACE = CharPredicate(" \t")
def SPACES: Rule0 = rule(oneOrMore(WHITESPACE))
def escaped = rule(optional(SPACES) ~
DQUOTE ~ (zeroOrMore(DELIMITER_TOKEN | TXT | CRLF | DQUOTE2) ~ DQUOTE ~
optional(SPACES)) ~> (_.mkString("")))
def nonEscaped = rule(zeroOrMore(TXT | capture(DQUOTE)) ~> (_.mkString("")))
def field = rule(escaped | nonEscaped)
def row: Rule1[Seq[String]] = rule(oneOrMore(field).separatedBy(delimeter))
def file = rule(zeroOrMore(row).separatedBy(NON_CAPTURING_CRLF))
}
@mquraishi
Copy link

def nonEscaped = rule(zeroOrMore(TXT | capture(DQUOTE)) -> (.mkString("")))
should it not be
def nonEscaped = rule(zeroOrMore(TXT | capture(DQUOTE)) ~> (
.mkString("")))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment