Skip to content

Instantly share code, notes, and snippets.

@iluvcapra
Last active April 22, 2016 18:04
Show Gist options
  • Save iluvcapra/f2b6f352d49f598ca65d7fa38622644c to your computer and use it in GitHub Desktop.
Save iluvcapra/f2b6f352d49f598ca65d7fa38622644c to your computer and use it in GitHub Desktop.
A Brief (< 150 LOC) Swift RFC4180 CSV Parser
#!/usr/bin/swift
import Foundation
/* Dans le mode de http://www.cocoawithlove.com/2009/11/writing-parser-using-nsscanner-csv.html */
/* https://tools.ietf.org/html/rfc4180 */
class CSVParser {
struct Error : ErrorType {var atRow: Int}
/* MARK: Interface. Event-driven parser. */
// beginRecordProc is called before a row is processed
var beginRecordProc : ()->()
// endRecordProc is called at the end of a row
var endRecordProc : ()->()
// fieldProc is called for each field in a row encountered
var fieldProc : (value : String) -> Void
/* MARK: Tokenizer and token stream */
enum Symbol : Int {
case Begin = 0
case DoubleQuote = 1
case DoubleDoubleQuote = 2
case Separator = 3
case LineSeparator = 4
case TextData = 5
case End = 6
}
struct Token {
var symbol : Symbol
var literal : String
var row : Int
}
let textDataCharSet : NSCharacterSet = {
let w = NSCharacterSet(charactersInString:",\"\r\n")
return w.invertedSet
}()
var scanner : NSScanner
var currentToken : Token = Token(symbol : .Begin, literal : "", row : 0)
var currentRow : Int = 0
func nextToken() {
if scanner.atEnd {
currentToken = Token( symbol : .End, literal : "", row : currentRow)
} else {
currentToken = {
var holding : NSString? = nil
if scanner.scanString("\"\"", intoString: &holding) {
return Token( symbol : .DoubleDoubleQuote, literal : String( holding!) , row:currentRow)
}
if scanner.scanString("\"", intoString: &holding) {
return Token( symbol : .DoubleQuote, literal : String(holding!), row : currentRow)
}
if scanner.scanString(",", intoString: &holding) {
return Token( symbol : .Separator, literal : String(holding!), row : currentRow)
}
if scanner.scanString("\r\n", intoString: &holding) ||
scanner.scanString("\n\r", intoString: &holding) ||
scanner.scanString("\n", intoString: &holding) ||
scanner.scanString("\r", intoString: &holding) {
defer { currentRow += 1 }
return Token( symbol : .LineSeparator, literal : String(holding!) , row : currentRow)
}
assert(scanner.scanCharactersFromSet(textDataCharSet, intoString : &holding) )
return Token(symbol : .TextData, literal: String(holding!), row: currentRow)
} ()
}
}
func accept(s : Symbol) -> Bool {
if currentToken.symbol == s {
nextToken()
return true
}
return false
}
func expect(s : Symbol) throws {
if (!accept(s)) {
throw Error(atRow: currentRow)
}
}
init(url : NSURL) throws {
let documentString = try String(contentsOfURL: url, encoding : NSUTF8StringEncoding)
scanner = NSScanner(string:String(documentString))
scanner.charactersToBeSkipped = nil
beginRecordProc = {}
endRecordProc = {}
fieldProc = {(value : String) in return}
nextToken()
}
/* parse non-terminals */
func escaped() {
var stringData = ""
repeat {
let thisTokenVal = currentToken.literal
if accept(.TextData) || accept(.LineSeparator) ||
accept(.Separator) {
stringData += thisTokenVal
} else if accept(.DoubleDoubleQuote) {
stringData += "\""
}
} while !accept(.DoubleQuote)
fieldProc(value : stringData)
}
func unescaped() {
let stringData = currentToken.literal
if accept(.TextData) {
fieldProc(value : stringData)
} else {
fieldProc(value : "")
}
}
func field() {
if accept(.DoubleQuote) {
escaped()
} else {
unescaped()
}
}
func record() {
beginRecordProc()
repeat {
field()
} while accept(.Separator)
endRecordProc()
}
func file() {
repeat {
record()
} while accept(.LineSeparator)
}
func parse() {
file()
}
}
let argv = Process.arguments
if argv.count > 1 {
let inputURL = NSURL.fileURLWithPath(argv[1])
let parser = try! CSVParser(url: inputURL)
var fieldIdx = 0
var rowIdx = 0
var colNames : [String] = []
parser.beginRecordProc = {
print("START RECORD \(rowIdx)")
fieldIdx = 0
}
parser.endRecordProc = {
print("END RECORD")
rowIdx += 1
}
parser.fieldProc = {(value : String) in
if rowIdx == 0 {
colNames += [value]
print("- READ HEADER FIELD=\(value)")
} else {
print("- \(colNames[fieldIdx])=\(value)")
}
fieldIdx += 1
}
parser.parse()
} else {
print("usage: csv.swift FILE")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment