Created
January 19, 2017 05:01
-
-
Save iamarkdev/92d73ca0062684da034ac19cae3b7b25 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
//const CircularJSON = require('circular-json'); | |
var Heap = require('heap'); | |
module.exports = BitsyDictionary; | |
function BitsyDictionary() { | |
this._dictionary = { // trie of symbols, with root to a given node representing a possible symbol. | |
symbolPointers: [], | |
huffmanTree: {} // Huffman tree for generating the binary code. | |
}; | |
} | |
BitsyDictionary.prototype.traverseSequence = function(sequence) { | |
var lastSymbol = this._dictionary; | |
for (var i = 0; i < sequence.length && lastSymbol; i++) { | |
var char = sequence[i].charCodeAt(0); | |
var symbol = lastSymbol[char]; | |
lastSymbol = symbol; | |
} | |
return lastSymbol; | |
} | |
BitsyDictionary.prototype.addSequence = function(sequence,initcount=2) { | |
var lastSymbol = this._dictionary; | |
for (var i = 0; i < sequence.length; i++) { | |
var char = sequence[i].charCodeAt(0); | |
var symbol = lastSymbol[char]; | |
if (!symbol) { | |
symbol = lastSymbol[char] = {}; | |
symbol.parent=lastSymbol; | |
symbol.char=sequence[i]; | |
} | |
lastSymbol = symbol; | |
} | |
if (lastSymbol.count) { | |
lastSymbol.count++; | |
} else { | |
//todo: work overlaps | |
lastSymbol.count = initcount; //the first time we enter in the dictionary we have seen it twice. | |
lastSymbol.length = sequence.length; | |
this._dictionary.symbolPointers.push(lastSymbol); | |
} | |
return lastSymbol; | |
} | |
BitsyDictionary.prototype._genLiteral = function(node){ | |
var ret=''; | |
if (node.parent && node.parent.parent) ret=this._genLiteral(node.parent); | |
ret+=node.char; | |
return ret; | |
} | |
BitsyDictionary.prototype._removeCircularRefs = function(node) { | |
delete node.parent; | |
for (var i=0; i<255; i++) if (node[i]) this._removeCircularRefs(node[i]); | |
} | |
var totalLenCount=0; | |
// Assign codes based on symbol probability relative to it's count. | |
BitsyDictionary.prototype._assignCodesRecursive = function(node,code,len) { | |
node.huffmanCode=code; | |
node.huffmanLen=len; | |
if (node.left){ | |
this._assignCodesRecursive(node.left,code*2+0,len+1); | |
this._assignCodesRecursive(node.right,code*2+1,len+1); | |
} | |
else if (node.char) { //this means it is not an "internal node" | |
if (node.length!=null && node.count!=null) totalLenCount+=node.length*node.count; | |
else console.log('members undefined at '+('0000000000000000000000' + (code >>> 0).toString(2)).substr(-len) + ': [' + this._genLiteral(node) + '] count=' + node.count); | |
} | |
else{ | |
console.log('node has no children yet the char property is not set. this shouldnt happen'); | |
} | |
} | |
BitsyDictionary.prototype._assignCodes = function() { | |
var nextCode = 0; | |
//huffman implementation | |
var heap = new Heap(function(a, b) { | |
return -(b.count*b.length - a.count*a.length); | |
}); | |
for (var i = 0; i < this._dictionary.symbolPointers.length; i++) | |
heap.push(this._dictionary.symbolPointers[i]); | |
while(heap.size()>1){ | |
//gather the least "frequent" symbols | |
var a=heap.pop(); | |
var b=heap.pop(); | |
//create a new node, add these two as children. | |
var c={}; | |
c.left=a; //a.huffmanParent=c; | |
c.right=b; //b.huffmanParent=c; | |
c.length=1; | |
c.count=a.count*a.length+b.count*b.length; | |
heap.push(c); | |
} | |
if (heap.size()!=1) console.log("expected exactly one element in the heap"); | |
this._dictionary.huffmanTree.root=heap.pop(); | |
this._assignCodesRecursive(this._dictionary.huffmanTree.root,0,0); | |
/* | |
this._dictionary.symbolPointers.sort(function(a, b) { | |
return b.count*b.length - a.count*a.length; | |
}); | |
for (var i = 0; i < this._dictionary.symbolPointers.length; i++) { | |
this._dictionary.symbolPointers[i].code = nextCode; | |
delete this._dictionary.symbolPointers[i].count; | |
delete this._dictionary.symbolPointers[i].length; | |
nextCode++; | |
} | |
*/ | |
delete this._dictionary.symbolPointers; | |
} | |
BitsyDictionary.prototype.writeToFile = function(filePath) { | |
console.log('assigning code'); | |
totalLenCount=0; | |
this._assignCodes(); | |
console.log('done. sum(len*count)='+totalLenCount); | |
console.log('saving to file'); | |
// fs.writeFileSync(filePath, CircularJSON.stringify(this._dictionary.huffmanTree)); | |
this._removeCircularRefs(this._dictionary); | |
fs.writeFileSync(filePath, JSON.stringify(this._dictionary)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment