Last active
September 15, 2021 12:11
-
-
Save gpDA/d0d400bd80da6e57e6a04d7afbe27844 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ASSUMPTION | |
// text files will be placed in the same directory as the program file | |
// Question | |
// is the word case-insensitive? not specified in the problem statement | |
// so in the code below, I take care of each words as case-insensitive e.g.) so & So are the same words | |
// HOW TO RUN PROGRAM | |
// node <file_name> <sampleFile> <commonFile> | |
// for example, node test.js alice_in_wonderland.txt 1-1000.txt | |
const fs = require('fs'); | |
const [,,sampleFile, commonFile] = process.argv; | |
const readFileAsync = async function({fileName, delimiter}) { | |
return new Promise(function(resolve, reject) { | |
fs.readFile(fileName, 'utf-8', function(err, data){ | |
if (err) | |
reject(err); | |
else | |
resolve(data.split(delimiter)); | |
}); | |
}); | |
}; | |
const files = [ | |
{ | |
fileName: sampleFile, | |
delimiter: ' ', | |
}, | |
{ | |
fileName: commonFile, | |
delimiter: '\n', | |
} | |
]; | |
(async() => { | |
const [sampleFileData, commonFileData] = await Promise.all(files.map((file) => readFileAsync(file))); | |
// get rid of the carriage return | |
const cleanCommonData = commonFileData.map((word) => word.slice(0, -1)); | |
// freqMap | |
const freqMap = new Map(); | |
for(const sampleWord of sampleFileData) { | |
if (sampleWord) { | |
// currently accept the word combination of | |
// alphanum + [._%-'`] + alphanum | |
// we could add more special cases | |
const regexTester = new RegExp(/^([a-zA-Z0-9]+[a-zA-Z0-9._%-'`]+[a-zA-Z0-9]+)?$/); | |
// not include in the commonData (case-insensitive) | |
if (regexTester.test(sampleWord) && !cleanCommonData.includes(sampleWord.toLowerCase())) { | |
freqMap.set(sampleWord, (freqMap.get(sampleWord) || 0) + 1); | |
} | |
} | |
} | |
// sort by freq in descending order | |
const sortedFreqMap = new Map([...freqMap.entries()].sort((a, b) => b[1] - a[1])); | |
console.log(`Count Word`); | |
console.log(`=== ====`); | |
for (const [word, freq] of sortedFreqMap) { | |
console.log(`${freq} ${word}`); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment