Skip to content

Instantly share code, notes, and snippets.

@mukeshthawani
Created July 13, 2019 18:04
Show Gist options
  • Save mukeshthawani/ddbcbd7b0ff75dd2ccc59aab877c555c to your computer and use it in GitHub Desktop.
Save mukeshthawani/ddbcbd7b0ff75dd2ccc59aab877c555c to your computer and use it in GitHub Desktop.
A simple CSV parser
/// Parses a csv string and returns a 2d array.
///
/// Size of the array will be equal to the number of rows.
/// And Size of the subarray will be equal to the
/// number of fields.
///
/// Note: Delimiter can be changed to a different character
/// like semicolon.
func parse(string: String, delimiter: Character = ",") -> [[String]]{
let rows = string.split(separator: "\n")
let commaSeparatedValues =
rows.map {
$0
.split(separator: String(delimiter))
.map { String($0) }
}
return commaSeparatedValues
}
extension String {
/// Splits a string into an array of subsequences
/// using a separator.
///
/// Note: Separator is ignored inside enclosure characters.
func split(separator: String, enclosure: Character = "\"") -> [String] {
var values: [String] = []
// Index of the last processed separator
var lastSeparatorIndex = startIndex
var isInsideDoubleQuotes = false
for index in 0..<count {
let substringStartIndex = self.index(startIndex, offsetBy: index)
let substringEndIndex = self.index(substringStartIndex, offsetBy: separator.count)
guard index < count - separator.count else {
// No more separators
// Add remaining characters
values.append(String(self[lastSeparatorIndex..<endIndex]))
break
}
let substring = self[substringStartIndex..<substringEndIndex]
if substring == separator && !isInsideDoubleQuotes {
let newstr = String(self[lastSeparatorIndex..<substringStartIndex])
values.append(newstr)
lastSeparatorIndex = substringEndIndex
} else if self[substringStartIndex] == enclosure {
isInsideDoubleQuotes = !isInsideDoubleQuotes
}
}
return values
}
}
class CSVParserTests: XCTestCase {
func testSingleLineString() {
let sampleText = "john,UK,2000"
XCTAssertEqual(
parse(string: sampleText),
[["john", "UK", "2000"]])
}
func testMultiLineString() {
let sampleText = """
john,UK,2000
jack,UK,2005
"""
XCTAssertEqual(
parse(string: sampleText),
[["john", "UK", "2000"],["jack", "UK", "2005"]])
}
func testSingleLineStringWithDoubleQuotes() {
let sampleText = "\"john, last\",UK,2000"
XCTAssertEqual(
parse(string: sampleText),
[["\"john, last\"", "UK", "2000"]])
}
func testMultiLineStringWithDoubleQuotes() {
let sampleText = """
john,UK,2000
jack,UK,2005
"natasha
obama",UK,2009
"""
XCTAssertEqual(parse(string: sampleText), [
["john", "UK", "2000"],
["jack", "UK", "2005"],
["\"natasha\n obama\"","UK","2009"]])
}
func testStringWithDoubleQuoteInsideDoubleQuotes() {
let sampleText = "\"aaa\",\"b\"\"bb\",\"ccc\""
XCTAssertEqual(
parse(string: sampleText),
[["\"aaa\"", "\"b\"\"bb\"", "\"ccc\""]])
}
func testStringWithWhiteSpace() {
let sampleText = " aaa, bbb, ccc"
XCTAssertEqual(
parse(string: sampleText),
[[" aaa", " bbb", " ccc"]])
}
func testStringWithSingleValue() {
XCTAssertEqual(parse(string: "UK"), [["UK"]])
}
func testStringWithEmptyValue() {
XCTAssertEqual(parse(string: ""), [])
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment