Skip to content

Instantly share code, notes, and snippets.

@tototoshi
Created August 25, 2012 14:48
Show Gist options
  • Save tototoshi/3466658 to your computer and use it in GitHub Desktop.
Save tototoshi/3466658 to your computer and use it in GitHub Desktop.
CSV Parser in scala
object CSVImplicits {
implicit def toCSVIterator(it: Iterator[String]): toCSVIterator = new toCSVIterator(it)
class toCSVIterator(it: Iterator[String]) {
def asCSV: CSVIterator = new CSVIterator(it)
}
}
class CSVIterator(it: Iterator[String]) extends Iterator[List[String]] {
val DOUBLE_QUOTATION: Char = '"'
def hasNext() = it.hasNext
@scala.annotation.tailrec
private def parseLine(line: List[Char], inQuotation: Boolean = false, nextField: String = "", result: List[String] = Nil): List[String] = {
line match {
case Nil => {
if (inQuotation) {
parseLine(it.next.toList, inQuotation, nextField + "\n", result)
} else {
result ::: nextField :: Nil
}
}
case '"' :: '"' :: ',' :: rest => {
parseLine(rest, inQuotation, "", result ::: "" :: Nil)
}
case '"' :: '"' :: rest => {
parseLine(rest, inQuotation, nextField + '"', result)
}
case '"' :: rest => {
parseLine(rest, !inQuotation, nextField, result)
}
case ',' :: rest if inQuotation=> {
parseLine(rest, inQuotation, nextField + ',', result)
}
case ',' :: rest => {
parseLine(rest, inQuotation, "", result ::: nextField :: Nil)
}
case c :: rest => {
parseLine(rest, inQuotation, nextField + c, result)
}
}
}
def next() = parseLine(it.next.toList)
}
object Main extends App {
val data = """|a,b,c,d
|a,b,,c
|a,b,"",c
|a,b,c,"de"
|a,b,c,"d""e"
|a,b,c,"d
|e"
|a,b,"c,c",d
|""".stripMargin
import CSVImplicits._
io.Source.fromString(data).getLines.asCSV.foreach(println)
/* output:
List(a, b, c, d)
List(a, b, , c)
List(a, b, , c)
List(a, b, c, de)
List(a, b, c, d"e)
List(a, b, c, d
e)
List(a, b, c,c, d)
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment