Last active
April 22, 2016 18:04
-
-
Save iluvcapra/f2b6f352d49f598ca65d7fa38622644c to your computer and use it in GitHub Desktop.
A Brief (< 150 LOC) Swift RFC4180 CSV Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/swift | |
import Foundation | |
/* Dans le mode de http://www.cocoawithlove.com/2009/11/writing-parser-using-nsscanner-csv.html */ | |
/* https://tools.ietf.org/html/rfc4180 */ | |
class CSVParser { | |
struct Error : ErrorType {var atRow: Int} | |
/* MARK: Interface. Event-driven parser. */ | |
// beginRecordProc is called before a row is processed | |
var beginRecordProc : ()->() | |
// endRecordProc is called at the end of a row | |
var endRecordProc : ()->() | |
// fieldProc is called for each field in a row encountered | |
var fieldProc : (value : String) -> Void | |
/* MARK: Tokenizer and token stream */ | |
enum Symbol : Int { | |
case Begin = 0 | |
case DoubleQuote = 1 | |
case DoubleDoubleQuote = 2 | |
case Separator = 3 | |
case LineSeparator = 4 | |
case TextData = 5 | |
case End = 6 | |
} | |
struct Token { | |
var symbol : Symbol | |
var literal : String | |
var row : Int | |
} | |
let textDataCharSet : NSCharacterSet = { | |
let w = NSCharacterSet(charactersInString:",\"\r\n") | |
return w.invertedSet | |
}() | |
var scanner : NSScanner | |
var currentToken : Token = Token(symbol : .Begin, literal : "", row : 0) | |
var currentRow : Int = 0 | |
func nextToken() { | |
if scanner.atEnd { | |
currentToken = Token( symbol : .End, literal : "", row : currentRow) | |
} else { | |
currentToken = { | |
var holding : NSString? = nil | |
if scanner.scanString("\"\"", intoString: &holding) { | |
return Token( symbol : .DoubleDoubleQuote, literal : String( holding!) , row:currentRow) | |
} | |
if scanner.scanString("\"", intoString: &holding) { | |
return Token( symbol : .DoubleQuote, literal : String(holding!), row : currentRow) | |
} | |
if scanner.scanString(",", intoString: &holding) { | |
return Token( symbol : .Separator, literal : String(holding!), row : currentRow) | |
} | |
if scanner.scanString("\r\n", intoString: &holding) || | |
scanner.scanString("\n\r", intoString: &holding) || | |
scanner.scanString("\n", intoString: &holding) || | |
scanner.scanString("\r", intoString: &holding) { | |
defer { currentRow += 1 } | |
return Token( symbol : .LineSeparator, literal : String(holding!) , row : currentRow) | |
} | |
assert(scanner.scanCharactersFromSet(textDataCharSet, intoString : &holding) ) | |
return Token(symbol : .TextData, literal: String(holding!), row: currentRow) | |
} () | |
} | |
} | |
func accept(s : Symbol) -> Bool { | |
if currentToken.symbol == s { | |
nextToken() | |
return true | |
} | |
return false | |
} | |
func expect(s : Symbol) throws { | |
if (!accept(s)) { | |
throw Error(atRow: currentRow) | |
} | |
} | |
init(url : NSURL) throws { | |
let documentString = try String(contentsOfURL: url, encoding : NSUTF8StringEncoding) | |
scanner = NSScanner(string:String(documentString)) | |
scanner.charactersToBeSkipped = nil | |
beginRecordProc = {} | |
endRecordProc = {} | |
fieldProc = {(value : String) in return} | |
nextToken() | |
} | |
/* parse non-terminals */ | |
func escaped() { | |
var stringData = "" | |
repeat { | |
let thisTokenVal = currentToken.literal | |
if accept(.TextData) || accept(.LineSeparator) || | |
accept(.Separator) { | |
stringData += thisTokenVal | |
} else if accept(.DoubleDoubleQuote) { | |
stringData += "\"" | |
} | |
} while !accept(.DoubleQuote) | |
fieldProc(value : stringData) | |
} | |
func unescaped() { | |
let stringData = currentToken.literal | |
if accept(.TextData) { | |
fieldProc(value : stringData) | |
} else { | |
fieldProc(value : "") | |
} | |
} | |
func field() { | |
if accept(.DoubleQuote) { | |
escaped() | |
} else { | |
unescaped() | |
} | |
} | |
func record() { | |
beginRecordProc() | |
repeat { | |
field() | |
} while accept(.Separator) | |
endRecordProc() | |
} | |
func file() { | |
repeat { | |
record() | |
} while accept(.LineSeparator) | |
} | |
func parse() { | |
file() | |
} | |
} | |
let argv = Process.arguments | |
if argv.count > 1 { | |
let inputURL = NSURL.fileURLWithPath(argv[1]) | |
let parser = try! CSVParser(url: inputURL) | |
var fieldIdx = 0 | |
var rowIdx = 0 | |
var colNames : [String] = [] | |
parser.beginRecordProc = { | |
print("START RECORD \(rowIdx)") | |
fieldIdx = 0 | |
} | |
parser.endRecordProc = { | |
print("END RECORD") | |
rowIdx += 1 | |
} | |
parser.fieldProc = {(value : String) in | |
if rowIdx == 0 { | |
colNames += [value] | |
print("- READ HEADER FIELD=\(value)") | |
} else { | |
print("- \(colNames[fieldIdx])=\(value)") | |
} | |
fieldIdx += 1 | |
} | |
parser.parse() | |
} else { | |
print("usage: csv.swift FILE") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment