Skip to content

Instantly share code, notes, and snippets.

@softwarespot
Created September 28, 2015 10:17
Show Gist options
  • Save softwarespot/063d4c58ad4e1a8b9244 to your computer and use it in GitHub Desktop.
Save softwarespot/063d4c58ad4e1a8b9244 to your computer and use it in GitHub Desktop.
Simple CSV parser
// The following kata is going back to basics with creating a parser. So nothing fancy here, just good 'ol best practices
/**
* CSV Parser. Takes a string as input and returns an array of arrays (for each row)
*
* @param {string} string CSV input string
* @param {string} separator Single character used to quote simple fields. Default is "
* @param {string} quote Single character used to separate fields. Default is ,
* @return {array} A 2-dimensional array which consists of an array of each row split into columns; otherwise, an empty 2-dimensional array
*/
function parseCSV(string, separator, quote) {
console.log(string);
if (string.length === 0) {
return [
['']
];
}
separator = separator || ',';
quote = quote || '"';
// String buffer. Faster than concatenating to another string each time
let buffer = [];
// Helper is* functions
const isEOL = (token) => token === '\n';
const isQuote = (token) => token === quote;
const isSeperator = (token) => token === separator;
// Join the buffer (stack) to a string and clear the buffer
const bufferToString = () => {
let string = buffer.join('');
while (buffer.pop()) {
// Just pop everything off the buffer stack
}
return string;
};
// Split the string into an array of chars
let chars = string.split('');
// Wrapper function to check if end of the quoted string
const endOfQuote = (currentIndex) => {
return isQuote(chars[1 + currentIndex]) && isSeperator(chars[2 + currentIndex]);
};
// Wrapper function to check end of the CSV string
const endOfString = (currentIndex) => {
return (1 + currentIndex) >= chars.length;
};
// Wrapper function to peek ahead at the next char
const peekAhead = (currentIndex) => {
return chars[1 + currentIndex]; // Or ++currentIndex
};
// Rows array
let rows = [];
// Columns array that is recreated each time a new row is parsed
let columns = [];
// Start of a quoted string or end of a quoted string
let inQuote = false;
for (let i = 0, length = chars.length; i < length; i++) {
// Store the token
let token = chars[i];
// Only true if not inside a quoted string
if ((isSeperator(token) || isEOL(token)) && !inQuote) {
let bufferString = bufferToString();
if (bufferString === '"') {
//bufferString = '';
}
// Push the buffer string onto the columns stack
columns.push(bufferString);
// If an EOL char, then end of the row
if (isEOL(token)) {
// Push the columns stack onto the rows stack
rows.push(columns);
// Re-create a new array. Note: If this is cleared like the buffer then all data is destroyed, hence the cloning
columns = [];
}
} else if (isQuote(token)) {
// Check if at the end of a string
if (inQuote && (isSeperator(peekAhead(i)) || isEOL(peekAhead(i))) || endOfString(i)) {
inQuote = false;
continue;
}
// If a quote is a head, then escape
if (inQuote && !endOfQuote(i)) {
// Push the quote on to the buffer
buffer.push(quote);
// Skip the quote char from being added again
i++;
}
inQuote = true;
} else {
// Push the token on to the buffer
buffer.push(token);
}
}
// If the buffer isn't empty
if (buffer.length !== 0) {
// Push the buffer string onto the columns stack
columns.push(bufferToString());
// Push the columns stack onto the rows stack
rows.push(columns);
}
return rows;
}
// Example
console.log(parseCSV('1,2,3,"Some random string",100'));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment