Skip to content

Instantly share code, notes, and snippets.

@gpDA
Last active September 15, 2021 12:11
Show Gist options
  • Save gpDA/d0d400bd80da6e57e6a04d7afbe27844 to your computer and use it in GitHub Desktop.
Save gpDA/d0d400bd80da6e57e6a04d7afbe27844 to your computer and use it in GitHub Desktop.
// ASSUMPTION
// text files will be placed in the same directory as the program file
// Question
// is the word case-insensitive? not specified in the problem statement
// so in the code below, I take care of each words as case-insensitive e.g.) so & So are the same words
// HOW TO RUN PROGRAM
// node <file_name> <sampleFile> <commonFile>
// for example, node test.js alice_in_wonderland.txt 1-1000.txt
const fs = require('fs');
const [,,sampleFile, commonFile] = process.argv;
const readFileAsync = async function({fileName, delimiter}) {
return new Promise(function(resolve, reject) {
fs.readFile(fileName, 'utf-8', function(err, data){
if (err)
reject(err);
else
resolve(data.split(delimiter));
});
});
};
const files = [
{
fileName: sampleFile,
delimiter: ' ',
},
{
fileName: commonFile,
delimiter: '\n',
}
];
(async() => {
const [sampleFileData, commonFileData] = await Promise.all(files.map((file) => readFileAsync(file)));
// get rid of the carriage return
const cleanCommonData = commonFileData.map((word) => word.slice(0, -1));
// freqMap
const freqMap = new Map();
for(const sampleWord of sampleFileData) {
if (sampleWord) {
// currently accept the word combination of
// alphanum + [._%-'`] + alphanum
// we could add more special cases
const regexTester = new RegExp(/^([a-zA-Z0-9]+[a-zA-Z0-9._%-'`]+[a-zA-Z0-9]+)?$/);
// not include in the commonData (case-insensitive)
if (regexTester.test(sampleWord) && !cleanCommonData.includes(sampleWord.toLowerCase())) {
freqMap.set(sampleWord, (freqMap.get(sampleWord) || 0) + 1);
}
}
}
// sort by freq in descending order
const sortedFreqMap = new Map([...freqMap.entries()].sort((a, b) => b[1] - a[1]));
console.log(`Count Word`);
console.log(`=== ====`);
for (const [word, freq] of sortedFreqMap) {
console.log(`${freq} ${word}`);
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment