Last active
February 28, 2018 23:06
-
-
Save carbonrobot/3a0e2751a7ddccfc7a31881249d3e61d to your computer and use it in GitHub Desktop.
CSVFileStreamParser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const marky = require('marky'); | |
const fs = require('fs'); | |
const Joi = require('joi'); | |
const CSVStreamParser = require('./csvStreamParser.js'); | |
const FILENAME = './files/single.csv'; | |
const input = fs.createReadStream(FILENAME); | |
const CENSUS_ATTR_MAP = { | |
lastname: 'lastName', | |
firstname: { | |
attrName: 'firstName', | |
validation: Joi.string().max(4).error(() => 'First Name must be a string') | |
} | |
}; | |
const parser = new CSVStreamParser(CENSUS_ATTR_MAP); | |
marky.mark('timing'); | |
parser.parse(input).then((records) => { | |
const ttl = marky.stop('timing').duration; | |
console.log(records); | |
console.log(`${records.length} records processed in ${ttl}ms`); | |
}).catch(console.log); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const csv = require('fast-csv'); | |
class CSVStreamParser { | |
constructor(attrMap) { | |
this.attrMap = attrMap; | |
} | |
parse(input) { | |
const records = []; | |
return new Promise((resolve, reject) => { | |
let headers; | |
const parser = csv() | |
.on('data', data => { | |
if (!headers) { | |
headers = this.parseHeaders(data); | |
} else { | |
const record = this.parseRecord(data, headers); | |
records.push(record); | |
} | |
}) | |
.on('error', error => reject(error)) | |
.on('end', () => { | |
resolve(records); | |
}); | |
input.pipe(parser); | |
}); | |
} | |
parseHeaders(data) { | |
return data.map(name => name.toLowerCase()); | |
} | |
parseRecord(data, headers) { | |
const record = {}; | |
let errors = undefined; | |
for (let i = 0; i < headers.length; i++) { | |
const idx = headers[i]; | |
const value = data[i]; | |
const attr = this.attrMap[idx]; | |
// only map columns that are known | |
if (attr) { | |
if(typeof attr === 'object'){ | |
record[attr.attrName] = value; | |
errors = this.validateAttr(attr, value); | |
} else { | |
record[attr] = value; | |
} | |
} | |
} | |
return { errors, record }; | |
} | |
validateAttr(attr, value) { | |
if(attr.validation){ | |
const validationResult = attr.validation.validate(value); | |
if(validationResult.error) { | |
return { attrName: attr.attrName, error: validationResult.error.message } | |
} | |
} | |
} | |
} | |
module.exports = CSVStreamParser; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment