-
-
Save emptymonkey/51dffd423ffe7299d65ed385dfbf2778 to your computer and use it in GitHub Desktop.
Read and parse files using streams in node.js to build a dictionary object mapping word=>frequency.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const Parse = require('./parser'); | |
//constructor | |
function CreateDictionary(){ | |
this.dictionary = {}; | |
} | |
// reads files from directory, wait for files to get parsed and then return dictionary | |
CreateDictionary.prototype.parseDirectory = function(dirpath){ | |
return new Promise((resolve,reject) => { | |
return fs.readdir(dirpath,(err,files) => { | |
if(err) reject(err); | |
else{ | |
const fns = files | |
.filter((file)=> /.*\.txt$/g.test(file)) | |
.map((file) => this.parseFile(dirpath+'/'+file)); | |
return Promise.all([...fns]).then((values,err) => { | |
if(err) reject(err); | |
console.log(`parsing complete.`); | |
resolve(this.dictionary); | |
}); | |
} | |
}); | |
}); | |
} | |
//read a single file, wait for it to get parsed and return dictionary | |
CreateDictionary.prototype.parseFile = function(filepath){ | |
return new Promise((resolve,reject) => { | |
const read = fs.createReadStream(filepath, {encoding: 'utf-8'}); | |
const parser = new Parse(this.dictionary); | |
console.log(`reading file ${filepath}`); | |
read.pipe(parser) | |
.on('error',(err) => reject(err)) | |
.on('finish',() => { | |
console.log(`finished parsing file ${filepath}`); | |
resolve(this.dictionary); | |
}); | |
}); | |
} | |
module.exports = CreateDictionary; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Transform = require('stream').Transform; | |
/** returns a transform stream object that expects chunk of data from a read stream, | |
* converts the chunk into string, | |
* cleans the string of any unwanted characters, | |
* splits the string into an array of words | |
* if word is present in the dictionary increase frequency of the word, else initialize frequency to 1 | |
**/ | |
function Parse (dictionary) { | |
const parser = new Transform(); | |
parser._transform = (data, encoding, done) => { | |
cleanString(data.toString()) | |
.split(/[\n]/g) | |
.filter((line) => line!=='') | |
.map((elem) => { | |
elem = elem.toLowerCase(); | |
dictionary[elem] = (!dictionary[elem]) ? 1 : dictionary[elem]+1 ; | |
}); | |
done(); | |
} | |
return parser; | |
} | |
function cleanString(str){ | |
str = str.replace(/[^a-zA-Z]/g,'\n'); | |
return str; | |
} | |
module.exports = Parse; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const CreateDictionary = require('./dictionary.js'); | |
const dict = new CreateDictionary(); | |
//test parseDirectory | |
dict.parseDirectory('some-folder-path') | |
.then((dictionary) => | |
console.log(` | |
number of dictionary elements : ${Object.keys(dictionary).length} | |
dictionary: ${JSON.stringify(dictionary,null,2)} | |
`) | |
).catch((err) => console.log(err.message)); | |
//test parseFile | |
dict.parseFile('some-file-path') | |
.then((dictionary) => | |
console.log(` | |
number of dictionary elements : ${Object.keys(dictionary).length} | |
dictionary: ${JSON.stringify(dictionary,null,2)} | |
`) | |
).catch((err) => console.log(err.message)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment