Created
September 28, 2015 10:17
-
-
Save softwarespot/063d4c58ad4e1a8b9244 to your computer and use it in GitHub Desktop.
Simple CSV parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The following kata is going back to basics with creating a parser. So nothing fancy here, just good 'ol best practices | |
/** | |
* CSV Parser. Takes a string as input and returns an array of arrays (for each row) | |
* | |
* @param {string} string CSV input string | |
* @param {string} separator Single character used to quote simple fields. Default is " | |
* @param {string} quote Single character used to separate fields. Default is , | |
* @return {array} A 2-dimensional array which consists of an array of each row split into columns; otherwise, an empty 2-dimensional array | |
*/ | |
function parseCSV(string, separator, quote) { | |
console.log(string); | |
if (string.length === 0) { | |
return [ | |
[''] | |
]; | |
} | |
separator = separator || ','; | |
quote = quote || '"'; | |
// String buffer. Faster than concatenating to another string each time | |
let buffer = []; | |
// Helper is* functions | |
const isEOL = (token) => token === '\n'; | |
const isQuote = (token) => token === quote; | |
const isSeperator = (token) => token === separator; | |
// Join the buffer (stack) to a string and clear the buffer | |
const bufferToString = () => { | |
let string = buffer.join(''); | |
while (buffer.pop()) { | |
// Just pop everything off the buffer stack | |
} | |
return string; | |
}; | |
// Split the string into an array of chars | |
let chars = string.split(''); | |
// Wrapper function to check if end of the quoted string | |
const endOfQuote = (currentIndex) => { | |
return isQuote(chars[1 + currentIndex]) && isSeperator(chars[2 + currentIndex]); | |
}; | |
// Wrapper function to check end of the CSV string | |
const endOfString = (currentIndex) => { | |
return (1 + currentIndex) >= chars.length; | |
}; | |
// Wrapper function to peek ahead at the next char | |
const peekAhead = (currentIndex) => { | |
return chars[1 + currentIndex]; // Or ++currentIndex | |
}; | |
// Rows array | |
let rows = []; | |
// Columns array that is recreated each time a new row is parsed | |
let columns = []; | |
// Start of a quoted string or end of a quoted string | |
let inQuote = false; | |
for (let i = 0, length = chars.length; i < length; i++) { | |
// Store the token | |
let token = chars[i]; | |
// Only true if not inside a quoted string | |
if ((isSeperator(token) || isEOL(token)) && !inQuote) { | |
let bufferString = bufferToString(); | |
if (bufferString === '"') { | |
//bufferString = ''; | |
} | |
// Push the buffer string onto the columns stack | |
columns.push(bufferString); | |
// If an EOL char, then end of the row | |
if (isEOL(token)) { | |
// Push the columns stack onto the rows stack | |
rows.push(columns); | |
// Re-create a new array. Note: If this is cleared like the buffer then all data is destroyed, hence the cloning | |
columns = []; | |
} | |
} else if (isQuote(token)) { | |
// Check if at the end of a string | |
if (inQuote && (isSeperator(peekAhead(i)) || isEOL(peekAhead(i))) || endOfString(i)) { | |
inQuote = false; | |
continue; | |
} | |
// If a quote is a head, then escape | |
if (inQuote && !endOfQuote(i)) { | |
// Push the quote on to the buffer | |
buffer.push(quote); | |
// Skip the quote char from being added again | |
i++; | |
} | |
inQuote = true; | |
} else { | |
// Push the token on to the buffer | |
buffer.push(token); | |
} | |
} | |
// If the buffer isn't empty | |
if (buffer.length !== 0) { | |
// Push the buffer string onto the columns stack | |
columns.push(bufferToString()); | |
// Push the columns stack onto the rows stack | |
rows.push(columns); | |
} | |
return rows; | |
} | |
// Example | |
console.log(parseCSV('1,2,3,"Some random string",100')); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment