Skip to content

Instantly share code, notes, and snippets.

@iamarkdev
Created January 19, 2017 05:00
Show Gist options
  • Save iamarkdev/fa75be179e6ceab44beded582e31ff33 to your computer and use it in GitHub Desktop.
Save iamarkdev/fa75be179e6ceab44beded582e31ff33 to your computer and use it in GitHub Desktop.
const fs = require('fs');
const os = require('os');
const path = require('path');
const BloomFilter = require('bloomfilter').BloomFilter;
const BitsyDictionary = require('../libs/dictionary');
module.exports = function(commander) {
commander
.command('dictionary <directoryPath> <outputFilePath>')
.description('Generate a bitsy dictionary from files within a directory.')
.action(command);
}
function command(directoryPath, outputFilePath) {
// Check if the directoryPath is accessible.
if (!fs.existsSync(directoryPath)) {
process.stdout.error(`Directory path '${directoryPath}' does not exist!`);
return;
}
// Check if the directoryPath is a directory.
if (!fs.statSync(directoryPath).isDirectory()) {
process.stdout.error(`Directory path '${directoryPath}' is not a directory!`);
return;
}
// Check if the directoryPath contains files.
var directoryPathContents = fs.readdirSync(directoryPath);
var directoryPathFiles = [];
directoryPathContents.forEach(function(directoryItem) {
if (fs.statSync(path.join(directoryPath, directoryItem)).isFile() && directoryItem[0] !== '.') {
directoryPathFiles.push(path.join(directoryPath, directoryItem));
}
});
if (!directoryPathFiles.length) {
process.stdout.error(`Directory path '${directoryPath}' does not contain any files!`);
return;
}
buildDictionary(directoryPathFiles, outputFilePath);
};
/* Dictionary Generation */
function buildDictionary(inputFilePaths, outputFilePath) {
var input = '';
var remainingIndexes = [];
var dictionary = new BitsyDictionary();
var length = 1;
const lengthMultiplier = 2;
for (var i = 0; i < inputFilePaths.length; i++) {
input += fs.readFileSync(inputFilePaths[i], {
encoding: 'binary'
});
}
for (var i = 0; i < input.length; i++) {
remainingIndexes.push(i);
}
while (remainingIndexes.length > 0) {
console.log(`On length ${length} with ${remainingIndexes.length} indexes remaining`);
var newRemainingIndexes = [];
var bloomFilter = new BloomFilter(
8 * remainingIndexes.length * 2,
12
);
for (var i = 0; i < remainingIndexes.length; i++) {
var index = remainingIndexes[i];
if (index + length > input.length) {
continue; // on the edge of the input, would not satisfy sequence length at this index
}
var sequence = input.substr(index, length);
if (!bloomFilter.test(sequence)) {
bloomFilter.add(sequence);
if (length==1) dictionary.addSequence(sequence,1);
}
else {
newRemainingIndexes.push(index);
dictionary.addSequence(sequence,2);
}
}
remainingIndexes = newRemainingIndexes;
length *= lengthMultiplier;
if (length==64) break;
}
console.log('Writing to '+path.join(outputFilePath, 'dictionary.bitsy'));
dictionary.writeToFile(path.join(outputFilePath, 'dictionary.bitsy'));
console.log('Bitsy Dictionary Successfully Generated');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment