Created
March 12, 2018 13:49
-
-
Save carbonrobot/50c277b62e78ca63815e01cb9783c800 to your computer and use it in GitHub Desktop.
CSV Parser with Schema Validation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const csv = require('fast-csv'); | |
const HeaderValidationError = require('./headerValidationError'); | |
/** | |
* TODO: missing features | |
* Check for missing condtl data | |
* Check for missing exclusive condtl data | |
* Check for invalid characters in headers | |
* Test for non-joi validation | |
* Joi helper should export custom joi instance | |
* schema/folder, tests for schema | |
* FileProcessingError, determine if needed | |
* Document things | |
* Logging | |
*/ | |
class CSVStreamParser { | |
constructor(attrMap) { | |
this.attrMap = Object.entries(attrMap); | |
} | |
parse(input) { | |
const records = []; | |
return new Promise((resolve, reject) => { | |
let headers; | |
let lineNumber = 0; | |
const parser = csv() | |
.on('data', data => { | |
if (!headers) { | |
headers = normalizeHeaders(data); | |
assertValidHeaders(this.attrMap, headers); | |
} else { | |
const record = parseRecord(this.attrMap, data, headers); | |
record.lineNumber = ++lineNumber; | |
records.push(record); | |
} | |
}) | |
.on('error', error => reject(error)) | |
.on('end', (k) => resolve(records)); | |
input.pipe(parser); | |
}); | |
} | |
} | |
function assertValidHeaders(attrMap, headers) { | |
// check for null/empty headers | |
if(headers.includes('')){ | |
throw new HeaderValidationError('Null headers are not allowed'); | |
} | |
// check for missing headers | |
const missing = attrMap | |
.filter(([attrName, options]) => { | |
if (typeof options === 'object' && options.required) { | |
let required = options.required; | |
if (typeof options.required === 'function') { | |
required = options.required(headers); | |
} | |
if (required) { | |
return headers.indexOf(options.name) < 0; | |
} | |
} | |
}) | |
.map(([attrName, options]) => options.name); | |
if (missing.length > 0) { | |
const missingHeaders = missing.map(i => i.toLowerCase()).join(); | |
throw new HeaderValidationError(`Missing required headers: ${missingHeaders}`); | |
} | |
} | |
function getAttrByKeyName(attrMap, headerKey) { | |
return attrMap.find(([attrName, options]) => { | |
return typeof options === 'object' ? options.name === headerKey : options === headerKey; | |
}); | |
}; | |
function normalizeHeaders(data) { | |
return data.map(name => name.trim().replace(/[ \/]/g, '').toLowerCase()); | |
} | |
function parseRecord(attrMap, data, headers) { | |
const record = {}; | |
let errors = undefined; | |
for (let i = 0; i < headers.length; i++) { | |
const headerKey = headers[i]; | |
const value = data[i]; | |
// only map columns that are known | |
const attr = getAttrByKeyName(attrMap, headerKey); | |
if (attr) { | |
const [attrName, options] = attr; | |
if (typeof options === 'object') { | |
const err = validateAttr(options, value); | |
if (err) { | |
errors = errors || []; | |
errors.originalData = data; | |
errors.push(err); | |
} else { | |
record[attrName] = value; | |
} | |
} else { | |
record[attrName] = value; | |
} | |
} | |
} | |
return { errors, record }; | |
} | |
function validateAttr(attr, value) { | |
if (attr.validation) { | |
const validationResult = attr.validation.validate(value); | |
if (validationResult.error) { | |
return { attrName: attr.name, message: validationResult.error.message } | |
} | |
} | |
} | |
module.exports = CSVStreamParser; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Readable = require('stream').Readable; | |
const os = require('os'); | |
const Joi = require('joi'); | |
function mockCSVStream({ headers, rows }) { | |
const input = new Readable(); | |
if (headers) { | |
input.push(headers); | |
input.push(os.EOL); | |
} | |
if (rows) { | |
rows.forEach(row => { | |
input.push(row); | |
input.push(os.EOL); | |
}); | |
} | |
input.push(null); | |
return input; | |
} | |
const CSVStreamParser = require('./csvStreamParser.js'); | |
const HeaderValidationError = require('./headerValidationError'); | |
describe('csv stream parser', function () { | |
it('should allow headers in lowercase', function () { | |
const input = mockCSVStream({ | |
headers: 'lastname,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].record) | |
.then(record => expect(record.lastName).toBeTruthy()); | |
}); | |
it('should allow spaces in the header', function () { | |
const input = mockCSVStream({ | |
headers: 'last name,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].record) | |
.then(record => expect(record.lastName).toBeTruthy()); | |
}); | |
it('should allow slashes in the header', function () { | |
const input = mockCSVStream({ | |
headers: 'last / name,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].record) | |
.then(record => expect(record.lastName).toBeTruthy()); | |
}); | |
it('should allow extra spaces around headers', function () { | |
const input = mockCSVStream({ | |
headers: ' lastName ,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].record) | |
.then(record => expect(record.lastName).toBeTruthy()); | |
}); | |
it('should ignore extra headers', function () { | |
const input = mockCSVStream({ | |
headers: 'lastName,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].record) | |
.then(record => expect(record.firstName).toBe(undefined)); | |
}); | |
it('should throw an error for empty headers', function () { | |
const input = mockCSVStream({ | |
headers: 'lastName,,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(() => fail()) | |
.catch(error => expect(error).toBeDefined()); | |
}); | |
it('should throw an error for header that contain only whitespace', function () { | |
const input = mockCSVStream({ | |
headers: 'lastName, ,firstname', | |
rows: ['man,bob,super'] | |
}); | |
const schema = { | |
lastName: 'lastname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(() => fail()) | |
.catch(error => expect(error).toBeDefined()); | |
}); | |
it('should support objects for schema options', function () { | |
const input = mockCSVStream({ | |
headers: ' lastName ,firstname', | |
rows: ['man,super'] | |
}); | |
const schema = { | |
lastName: { | |
name: 'lastname' | |
} | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].record) | |
.then(record => expect(record.lastName).toBeTruthy()); | |
}); | |
it('should throw an error for missing required headers', function () { | |
const input = mockCSVStream({ | |
headers: 'address', | |
rows: ['bleaker street'] | |
}); | |
const schema = { | |
lastName: { | |
name: 'lastname', | |
required: true | |
}, | |
firstName: { | |
name: 'firstName', | |
required: true | |
}, | |
address: 'address' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(() => fail()) | |
.catch(error => expect(error).toBeDefined()); | |
}); | |
it('should conditionally require headers', function () { | |
const input = mockCSVStream({ | |
headers: 'firstname,lastname,address', | |
rows: ['super,man,bleaker street'] | |
}); | |
const schema = { | |
lastName: 'lastname', | |
firstName: { | |
name: 'firstname', | |
required: headers => headers.includes('lastname') | |
}, | |
address: 'address' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].errors) | |
.then(errors => expect(errors).toBeUndefined()); | |
}); | |
it('should log an error if a conditionally required header is missing', function () { | |
const input = mockCSVStream({ | |
headers: 'lastname,address', | |
rows: ['man,bleaker street'] | |
}); | |
const schema = { | |
lastName: 'lastname', | |
firstName: { | |
name: 'firstname', | |
required: headers => headers.includes('lastname') | |
}, | |
address: 'address' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(() => fail()) | |
.catch(error => expect(error).toBeDefined()); | |
}); | |
it('should throw an error if exclusively required headers are missing', function () { | |
const input = mockCSVStream({ | |
headers: 'address', | |
rows: ['bleaker street'] | |
}); | |
const schema = { | |
employeeId: { | |
name: 'employeeid', | |
required: headers => !headers.includes('tascid') | |
}, | |
tascId: { | |
name: 'tascid', | |
required: headers => !headers.includes('employeeid') | |
}, | |
address: 'address' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(() => fail()) | |
.catch(error => expect(error).toBeDefined()); | |
}); | |
it('should log a custom error if exclusive required headers are missing', function () { | |
const input = mockCSVStream({ | |
headers: 'address', | |
rows: ['bleaker street'] | |
}); | |
const expectedErrorMessage = 'TascId or employeeId is required'; | |
const checkRequiredHeaders = headers => { | |
if (!(headers.includes('tascid') || headers.includes('employeeid'))) { | |
throw new HeaderValidationError(expectedErrorMessage); | |
} | |
}; | |
const schema = { | |
employeeId: { | |
name: 'employeeid', | |
required: checkRequiredHeaders | |
}, | |
tascId: { | |
name: 'tascid', | |
required: checkRequiredHeaders | |
}, | |
address: 'address' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(() => fail()) | |
.catch(error => expect(error.message).toBe(expectedErrorMessage)); | |
}); | |
it('should return a record for each line in a file', function () { | |
const input = mockCSVStream({ | |
headers: 'lastName,firstName', | |
rows: [ | |
'man,super', | |
'willis,bob' | |
] | |
}); | |
const schema = { | |
lastName: { | |
name: 'lastname', | |
validation: Joi.string().length(1) | |
} | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => expect(results.length).toBe(2)); | |
}); | |
it('should log an error for missing required data', function () { | |
const input = mockCSVStream({ | |
headers: 'firstname,lastname,address', | |
rows: ['super,,bleaker street'] | |
}); | |
const schema = { | |
lastName: { | |
name: 'lastname', | |
validation: Joi.string().required() | |
}, | |
firstName: 'firstname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].errors) | |
.then(errors => expect(errors.length).toBe(1)); | |
}); | |
it('should log a custom error message', function () { | |
const input = mockCSVStream({ | |
headers: 'firstname,lastname,address', | |
rows: ['super,,bleaker street'] | |
}); | |
const expectedErrorMessage = 'Last Name is required'; | |
const schema = { | |
lastName: { | |
name: 'lastname', | |
validation: Joi.string().required().error(() => expectedErrorMessage) | |
}, | |
firstName: 'firstname' | |
}; | |
const parser = new CSVStreamParser(schema); | |
return parser | |
.parse(input) | |
.then(results => results[0].errors) | |
.then(([error]) => expect(error.message).toBe(expectedErrorMessage)); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment