Created
October 15, 2013 12:32
-
-
Save fliptopbox/6990878 to your computer and use it in GitHub Desktop.
JavaScript String compression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
@fliptopbox | |
LZW Compression/Decompression for Strings | |
Implementation of LZW algorithms from: | |
http://rosettacode.org/wiki/LZW_compression#JavaScript | |
Usage: | |
var a = 'a very very long string to be squashed'; | |
var b = a.compress(); // 'a veryāăąlong striċ to bečquashed' | |
var c = b.uncompress(); // 'a very very long string to be squashed' | |
console.log(a === c); // True | |
var d = a.compress(true); // return as Array | |
console.log(d); // [97, 32, 118 .... 101, 100] an Array of ASCII codes | |
*/ | |
String.prototype.compress = function (asArray) { | |
"use strict"; | |
// Build the dictionary. | |
asArray = (asArray === true); | |
var i, | |
dictionary = {}, | |
uncompressed = this, | |
c, | |
wc, | |
w = "", | |
result = [], | |
ASCII = '', | |
dictSize = 256; | |
for (i = 0; i < 256; i += 1) { | |
dictionary[String.fromCharCode(i)] = i; | |
} | |
for (i = 0; i < uncompressed.length; i += 1) { | |
c = uncompressed.charAt(i); | |
wc = w + c; | |
//Do not use dictionary[wc] because javascript arrays | |
//will return values for array['pop'], array['push'] etc | |
// if (dictionary[wc]) { | |
if (dictionary.hasOwnProperty(wc)) { | |
w = wc; | |
} else { | |
result.push(dictionary[w]); | |
ASCII += String.fromCharCode(dictionary[w]); | |
// Add wc to the dictionary. | |
dictionary[wc] = dictSize++; | |
w = String(c); | |
} | |
} | |
// Output the code for w. | |
if (w !== "") { | |
result.push(dictionary[w]); | |
ASCII += String.fromCharCode(dictionary[w]); | |
} | |
return asArray ? result : ASCII; | |
}; | |
String.prototype.decompress = function () { | |
"use strict"; | |
// Build the dictionary. | |
var i, tmp = [], | |
dictionary = [], | |
compressed = this, | |
w, | |
result, | |
k, | |
entry = "", | |
dictSize = 256; | |
for (i = 0; i < 256; i += 1) { | |
dictionary[i] = String.fromCharCode(i); | |
} | |
if(compressed && typeof compressed === 'string') { | |
// convert string into Array. | |
for(i = 0; i < compressed.length; i += 1) { | |
tmp.push(compressed[i].charCodeAt(0)); | |
} | |
compressed = tmp; | |
tmp = null; | |
} | |
w = String.fromCharCode(compressed[0]); | |
result = w; | |
for (i = 1; i < compressed.length; i += 1) { | |
k = compressed[i]; | |
if (dictionary[k]) { | |
entry = dictionary[k]; | |
} else { | |
if (k === dictSize) { | |
entry = w + w.charAt(0); | |
} else { | |
return null; | |
} | |
} | |
result += entry; | |
// Add w+entry[0] to the dictionary. | |
dictionary[dictSize++] = w + entry.charAt(0); | |
w = entry; | |
} | |
return result; | |
}; |
It looks to me that this implementation mixed up ASCII and Unicode characters. The line
ASCII += String.fromCharCode(dictionary[w]);
will actually produce no ASCII, but Unicode characters. This may be a way to easily represent the codes 256+ in JavaScript, but I argue that this is not the LZW compression as other implementations will interpret it?!
For testing: https://jsfiddle.net/aers9b6g/
@BurninLeo hey, yes you are absolutely right. it is a unicode string. you argument is 100% correct.
this gist was written a very long time ago. i had actually forgotten about it.
you can return the ASCII value array like this String.compress(true)
the true
switch will bypasses the Unicode conversion.
thanks for the feedback :D i hope people read this comment
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Fantastic piece of code. Short, sweet, fast and very easy to use.