-
-
Save premasagar/519405 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
window._gzip = (function() | |
{ | |
var _matchCount = function(s, word) | |
{ | |
var n = 0, wordLen = word.length, p = -wordLen; | |
while((p = s.indexOf(word, p + wordLen)) > -1) | |
++n; | |
return n; | |
}; | |
var _matchCountInDict = function(dict, word) | |
{ | |
var n = 0, i = dict.length; | |
while(i--) | |
{ | |
if(dict[i].word.indexOf(word) > -1) | |
++n; | |
} | |
return n; | |
}; | |
var _escapeForCString = function(s) | |
{ | |
return s.replace(/\\/g, '\\\\'). | |
replace(/\'/g, '\\\''). | |
//replace(/\"/g, '\\\"'). | |
replace(/\r/g, '\\r'). | |
replace(/\0/g, '\\x00'). | |
replace(/\n/g, '\\n'); | |
}; | |
var reEscapeableRE = /[\[\]\(\)\{\}\?\!\:\|\+\.\*\'\"]/g; | |
var _escapeForRegExp = function(s) | |
{ | |
return s.replace(reEscapeableRE, '\\$&'); | |
}; | |
var _escapeForRegExpReplace = function(s) | |
{ | |
return s.replace(/\$/g, '\\$$'); | |
}; | |
var _removeEscapebleChars = function(a) | |
{ | |
delete a['^']; | |
delete a['\"']; | |
delete a['\'']; | |
delete a['\\']; | |
delete a['$']; | |
delete a['[']; | |
delete a[']']; | |
delete a['(']; | |
delete a[')']; | |
delete a['{']; | |
delete a['}']; | |
delete a['?']; | |
delete a['!']; | |
delete a[':']; | |
delete a['|']; | |
delete a['+']; | |
delete a['`']; | |
delete a['.']; | |
delete a['*']; | |
}; | |
var _dumpDict = function(dict) | |
{ | |
var t = ''; | |
var d, i = -1; while((d = dict[++i])) | |
t += d.word + '--'; | |
return t; | |
}; | |
//var unpackCode = 'for(var s=\'__CODE__\',d=\'__DICT__\'.split("`"),i=0;v=d[i++];)s=s.replace(RegExp(v[0],"g"),v.slice(1));console.log(s)'; | |
var unpackCode = 'for(var s=\'__CODE__\',d=\'__DICT__\'.split("`"),i=0;v=d[i++];)s=s.replace(RegExp(v[0],"g"),v.slice(1));eval(s)'; | |
/** | |
@fn pack and wrap to <unpackCode> code <s> using gzip like algo (dictonaty based) | |
@param allowNonAcsiiChars {Boolean} {Default = true} true if you allow use 32-256 chars range else 32-127. | |
@param minProfit {Number} {Default = 0} threshold in bytes for each replace pair | |
@return packed and wrapped to <unpackCode> code | |
*/ | |
var _gzip = function(s, allowNonAcsiiChars, minProfit) | |
{ | |
if(allowNonAcsiiChars == null) | |
allowNonAcsiiChars = true; | |
if(minProfit == null) | |
minProfit = 1; | |
var availableCharMap = {}; | |
var i = 31, e = (allowNonAcsiiChars) ? 256 : 127; | |
// gen all available char map | |
while(++i < e) | |
availableCharMap[String.fromCharCode(i)] = 0; | |
// and remove some chars which require escaping (in RegExp too) | |
_removeEscapebleChars(availableCharMap); | |
// reduce availableCharMap - remove chars which used in <s> | |
i = -1, e = s.length; while(++i < e) | |
{ | |
delete availableCharMap[s.charAt(i)]; | |
} | |
// convert <allowNonAcsiiChars> to <availableChars> | |
var availableChars = [], j = 0; | |
for(var i in availableCharMap) | |
{ | |
if(availableCharMap.hasOwnProperty(i)) | |
availableChars[j++] = i; | |
} | |
//console.log(availableChars.join('> <')); | |
//console.log(availableChars); | |
var sLen = s.length; | |
var dict = [], dictLen = 0; | |
// build <dict> | |
i = -1, e = sLen - 2; while(++i < e) | |
{ | |
var c = s.charAt(i); | |
var j = i + 1; | |
do | |
{ | |
while(++j < e && s.charAt(j) != c) | |
; | |
if(j < e && s.charAt(i + 1) == s.charAt(++j)) | |
{ | |
var k = i + 1; | |
do | |
{ | |
var word = s.slice(i, k + 1); | |
//if(!reEscapeableRE.test(word)) | |
dict[dictLen++] = {word: word, matchCount: _matchCount(s, word)}; | |
} | |
while(++k < sLen && ++j < sLen && s.charAt(k) == s.charAt(j)); | |
} | |
} | |
while(j < e); | |
} | |
// sort <dict> by word | |
dict.sort(function(a, b){ return 2*(a.word < b.word) - 1; }); | |
//console.log(_dumpDict(dict)); | |
// remove duplicating words | |
var d, i = -1, word, j = 0; while((d = dict[++i])) | |
{ | |
if(d.word != word) | |
{ | |
dict[j++] = d; | |
word = d.word; | |
} | |
} | |
dict.length = dictLen = j; | |
// word will be replaced using RegExp - prepare escaped vestion | |
var d, i = -1; while((d = dict[++i])) | |
d.escapedWord = _escapeForRegExp(d.word); | |
// calc profit of each word in <dict> | |
var d, i = -1; while((d = dict[++i])) | |
{ | |
var escapedWord = _escapeForRegExpReplace(d.word); | |
d.profit = d.matchCount*(escapedWord.length - 1) - escapedWord.length - 1 - 1; | |
} | |
// remove words with profit < <minProfit> | |
var d, i = -1, j = -1; while((d = dict[++i])) | |
{ | |
if(d.profit > minProfit) | |
dict[++j] = d; | |
} | |
dict.length = dictLen = j + 1; | |
//console.log(_dumpDict(dict)); | |
// and sort <dict> from last to top by profit and word length | |
dict.sort(function(a, b){ return (b.profit - a.profit) || (b.word.length - a.word.length); }); | |
//console.log(_dumpDict(dict)); | |
// assign replaceChar, replace words to replaceChars and remove unused words | |
var t = s; | |
var d, i = -1, j = 0; while((d = dict[++i])) | |
{ | |
if(t.indexOf(d.word) > -1) | |
{ | |
d.replaceChar = availableChars[j]; | |
t = t.replace(new RegExp(d.escapedWord, 'g'), d.replaceChar); | |
dict[j] = d; | |
j++; | |
} | |
} | |
dict.length = dictLen = j; | |
console.log('t = ', t); | |
// compress <dict> | |
// calc match one word in other words | |
var d, i = -1; while((d = dict[++i])) | |
d.matchCountInDict = _matchCountInDict(dict, d.word); | |
// sort <dict> by matchCountInDict from max to min | |
dict.sort(function(a, b){ return a.matchCountInDict - b.matchCountInDict; }); | |
// redice <dict> | |
var d, i = -1; while((d = dict[++i])) | |
{ | |
var word = d.word; | |
var j = i, d2; while((d2 = dict[++i])) | |
word = word.replace(new RegExp(d2.escapedWord, 'g'), d2.replaceChar); | |
d.word = word; | |
} | |
//console.log(_dumpDict(dict)); | |
console.log(dict); | |
//return; | |
// final | |
// serialize <dict> | |
var dictString = ''; | |
var d, i = -1; while((d = dict[++i])) | |
dictString += d.replaceChar + _escapeForRegExpReplace(d.word) + '`'; | |
dictString = dictString.slice(0, -1); | |
console.log(dictString); | |
// and create unpack code | |
var code = unpackCode. | |
replace('__DICT__', _escapeForCString(dictString)). | |
replace('__CODE__', _escapeForCString(t)) | |
; | |
console.log('code.length = ', code.length); | |
console.log('s.length = ', s.length); | |
console.log('profit = ', (s.length/code.length*100).toFixed(2), '%'); | |
return code; | |
}; | |
return _gzip; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment