Created
July 13, 2019 18:04
-
-
Save mukeshthawani/ddbcbd7b0ff75dd2ccc59aab877c555c to your computer and use it in GitHub Desktop.
A simple CSV parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Parses a csv string and returns a 2d array. | |
/// | |
/// Size of the array will be equal to the number of rows. | |
/// And Size of the subarray will be equal to the | |
/// number of fields. | |
/// | |
/// Note: Delimiter can be changed to a different character | |
/// like semicolon. | |
func parse(string: String, delimiter: Character = ",") -> [[String]]{ | |
let rows = string.split(separator: "\n") | |
let commaSeparatedValues = | |
rows.map { | |
$0 | |
.split(separator: String(delimiter)) | |
.map { String($0) } | |
} | |
return commaSeparatedValues | |
} | |
extension String { | |
/// Splits a string into an array of subsequences | |
/// using a separator. | |
/// | |
/// Note: Separator is ignored inside enclosure characters. | |
func split(separator: String, enclosure: Character = "\"") -> [String] { | |
var values: [String] = [] | |
// Index of the last processed separator | |
var lastSeparatorIndex = startIndex | |
var isInsideDoubleQuotes = false | |
for index in 0..<count { | |
let substringStartIndex = self.index(startIndex, offsetBy: index) | |
let substringEndIndex = self.index(substringStartIndex, offsetBy: separator.count) | |
guard index < count - separator.count else { | |
// No more separators | |
// Add remaining characters | |
values.append(String(self[lastSeparatorIndex..<endIndex])) | |
break | |
} | |
let substring = self[substringStartIndex..<substringEndIndex] | |
if substring == separator && !isInsideDoubleQuotes { | |
let newstr = String(self[lastSeparatorIndex..<substringStartIndex]) | |
values.append(newstr) | |
lastSeparatorIndex = substringEndIndex | |
} else if self[substringStartIndex] == enclosure { | |
isInsideDoubleQuotes = !isInsideDoubleQuotes | |
} | |
} | |
return values | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CSVParserTests: XCTestCase { | |
func testSingleLineString() { | |
let sampleText = "john,UK,2000" | |
XCTAssertEqual( | |
parse(string: sampleText), | |
[["john", "UK", "2000"]]) | |
} | |
func testMultiLineString() { | |
let sampleText = """ | |
john,UK,2000 | |
jack,UK,2005 | |
""" | |
XCTAssertEqual( | |
parse(string: sampleText), | |
[["john", "UK", "2000"],["jack", "UK", "2005"]]) | |
} | |
func testSingleLineStringWithDoubleQuotes() { | |
let sampleText = "\"john, last\",UK,2000" | |
XCTAssertEqual( | |
parse(string: sampleText), | |
[["\"john, last\"", "UK", "2000"]]) | |
} | |
func testMultiLineStringWithDoubleQuotes() { | |
let sampleText = """ | |
john,UK,2000 | |
jack,UK,2005 | |
"natasha | |
obama",UK,2009 | |
""" | |
XCTAssertEqual(parse(string: sampleText), [ | |
["john", "UK", "2000"], | |
["jack", "UK", "2005"], | |
["\"natasha\n obama\"","UK","2009"]]) | |
} | |
func testStringWithDoubleQuoteInsideDoubleQuotes() { | |
let sampleText = "\"aaa\",\"b\"\"bb\",\"ccc\"" | |
XCTAssertEqual( | |
parse(string: sampleText), | |
[["\"aaa\"", "\"b\"\"bb\"", "\"ccc\""]]) | |
} | |
func testStringWithWhiteSpace() { | |
let sampleText = " aaa, bbb, ccc" | |
XCTAssertEqual( | |
parse(string: sampleText), | |
[[" aaa", " bbb", " ccc"]]) | |
} | |
func testStringWithSingleValue() { | |
XCTAssertEqual(parse(string: "UK"), [["UK"]]) | |
} | |
func testStringWithEmptyValue() { | |
XCTAssertEqual(parse(string: ""), []) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment