Last active
May 20, 2024 20:29
-
-
Save nijikokun/5192472 to your computer and use it in GitHub Desktop.
Javascript Base64 UTF8 for the Browser / Server. Base64 UTF-8 Encoding and Decoding Libraries / Modules for AMD, CommonJS, Nodejs and Browsers. Cross-browser compatible.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// UTF8 Module | |
// | |
// Cleaner and modularized utf-8 encoding and decoding library for javascript. | |
// | |
// copyright: MIT | |
// author: Nijiko Yonskai, @nijikokun, [email protected] | |
(function (name, definition, context, dependencies) { | |
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); } | |
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); } | |
else { context[name] = definition.apply(context); } | |
})('utf8', function () { | |
return { | |
encode: function (string) { | |
if (typeof string !== 'string') return string; | |
else string = string.replace(/\r\n/g, "\n"); | |
var output = "", i = 0, charCode; | |
for (i; i < string.length; i++) { | |
charCode = string.charCodeAt(i); | |
if (charCode < 128) | |
output += String.fromCharCode(charCode); | |
else if ((charCode > 127) && (charCode < 2048)) | |
output += String.fromCharCode((charCode >> 6) | 192), | |
output += String.fromCharCode((charCode & 63) | 128); | |
else | |
output += String.fromCharCode((charCode >> 12) | 224), | |
output += String.fromCharCode(((charCode >> 6) & 63) | 128), | |
output += String.fromCharCode((charCode & 63) | 128); | |
} | |
return output; | |
}, | |
decode: function (string) { | |
if (typeof string !== 'string') return string; | |
var output = "", i = 0, charCode = 0; | |
while (i < string.length) { | |
charCode = string.charCodeAt(i); | |
if (charCode < 128) | |
output += String.fromCharCode(charCode), | |
i++; | |
else if ((charCode > 191) && (charCode < 224)) | |
output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63)), | |
i += 2; | |
else | |
output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63)), | |
i += 3; | |
} | |
return output; | |
} | |
}; | |
}, this); | |
// Base64 Module | |
// | |
// Cleaner, modularized and properly scoped base64 encoding and decoding module for strings. | |
// | |
// copyright: MIT | |
// author: Nijiko Yonskai, @nijikokun, [email protected] | |
(function (name, definition, context, dependencies) { | |
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); } | |
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); } | |
else { context[name] = definition(); } | |
})('base64', function (utf8) { | |
var $this = this; | |
var $utf8 = utf8 || this.utf8; | |
var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; | |
return { | |
encode: function (input) { | |
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." }; | |
if (typeof input !== 'string') return input; | |
else input = $utf8.encode(input); | |
var output = "", a, b, c, d, e, f, g, i = 0; | |
while (i < input.length) { | |
a = input.charCodeAt(i++); | |
b = input.charCodeAt(i++); | |
c = input.charCodeAt(i++); | |
d = a >> 2; | |
e = ((a & 3) << 4) | (b >> 4); | |
f = ((b & 15) << 2) | (c >> 6); | |
g = c & 63; | |
if (isNaN(b)) f = g = 64; | |
else if (isNaN(c)) g = 64; | |
output += map.charAt(d) + map.charAt(e) + map.charAt(f) + map.charAt(g); | |
} | |
return output; | |
}, | |
decode: function (input) { | |
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." }; | |
if (typeof input !== 'string') return input; | |
else input = input.replace(/[^A-Za-z0-9\+\/\=]/g, ""); | |
var output = "", a, b, c, d, e, f, g, i = 0; | |
while (i < input.length) { | |
d = map.indexOf(input.charAt(i++)); | |
e = map.indexOf(input.charAt(i++)); | |
f = map.indexOf(input.charAt(i++)); | |
g = map.indexOf(input.charAt(i++)); | |
a = (d << 2) | (e >> 4); | |
b = ((e & 15) << 4) | (f >> 2); | |
c = ((f & 3) << 6) | g; | |
output += String.fromCharCode(a); | |
if (f != 64) output += String.fromCharCode(b); | |
if (g != 64) output += String.fromCharCode(c); | |
} | |
return $utf8.decode(output); | |
} | |
} | |
}, this, [ "utf8" ]); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Base64 Module | |
// | |
// Cleaner, modularized and properly scoped base64 encoding and decoding module for strings. | |
// | |
// copyright: MIT | |
// author: Nijiko Yonskai, @nijikokun, [email protected] | |
(function (name, definition, context, dependencies) { | |
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); } | |
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); } | |
else { context[name] = definition(); } | |
})('base64', function (utf8) { | |
var $this = this; | |
var $utf8 = utf8 || this.utf8; | |
var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; | |
return { | |
encode: function (input) { | |
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." }; | |
if (typeof input !== 'string') return input; | |
else input = $utf8.encode(input); | |
var output = "", a, b, c, d, e, f, g, i = 0; | |
while (i < input.length) { | |
a = input.charCodeAt(i++); | |
b = input.charCodeAt(i++); | |
c = input.charCodeAt(i++); | |
d = a >> 2; | |
e = ((a & 3) << 4) | (b >> 4); | |
f = ((b & 15) << 2) | (c >> 6); | |
g = c & 63; | |
if (isNaN(b)) f = g = 64; | |
else if (isNaN(c)) g = 64; | |
output += map.charAt(d) + map.charAt(e) + map.charAt(f) + map.charAt(g); | |
} | |
return output; | |
}, | |
decode: function (input) { | |
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." }; | |
if (typeof input !== 'string') return input; | |
else input = input.replace(/[^A-Za-z0-9\+\/\=]/g, ""); | |
var output = "", a, b, c, d, e, f, g, i = 0; | |
while (i < input.length) { | |
d = map.indexOf(input.charAt(i++)); | |
e = map.indexOf(input.charAt(i++)); | |
f = map.indexOf(input.charAt(i++)); | |
g = map.indexOf(input.charAt(i++)); | |
a = (d << 2) | (e >> 4); | |
b = ((e & 15) << 4) | (f >> 2); | |
c = ((f & 3) << 6) | g; | |
output += String.fromCharCode(a); | |
if (f != 64) output += String.fromCharCode(b); | |
if (g != 64) output += String.fromCharCode(c); | |
} | |
return $utf8.decode(output); | |
} | |
} | |
}, this, [ "utf8" ]); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// UTF8 Module | |
// | |
// Cleaner and modularized utf-8 encoding and decoding library for javascript. | |
// | |
// copyright: MIT | |
// author: Nijiko Yonskai, @nijikokun, [email protected] | |
(function (name, definition, context, dependencies) { | |
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); } | |
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); } | |
else { context[name] = definition.apply(context); } | |
})('utf8', function () { | |
return { | |
encode: function (string) { | |
if (typeof string !== 'string') return string; | |
else string = string.replace(/\r\n/g, "\n"); | |
var output = "", i = 0, charCode; | |
for (i; i < string.length; i++) { | |
charCode = string.charCodeAt(i); | |
if (charCode < 128) | |
output += String.fromCharCode(charCode); | |
else if ((charCode > 127) && (charCode < 2048)) | |
output += String.fromCharCode((charCode >> 6) | 192), | |
output += String.fromCharCode((charCode & 63) | 128); | |
else | |
output += String.fromCharCode((charCode >> 12) | 224), | |
output += String.fromCharCode(((charCode >> 6) & 63) | 128), | |
output += String.fromCharCode((charCode & 63) | 128); | |
} | |
return output; | |
}, | |
decode: function (string) { | |
if (typeof string !== 'string') return string; | |
var output = "", i = 0, charCode = 0; | |
while (i < string.length) { | |
charCode = string.charCodeAt(i); | |
if (charCode < 128) | |
output += String.fromCharCode(charCode), | |
i++; | |
else if ((charCode > 191) && (charCode < 224)) | |
output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63)), | |
i += 2; | |
else | |
output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63)), | |
i += 3; | |
} | |
return output; | |
} | |
}; | |
}, this); |
See also TiddlyWiki/TiddlyWiki5#4685, which was caused by this code not handling surrogate pairs properly, and was fixed by the code I posted in https://gist.github.com/Nijikokun/5192472#gistcomment-3416186 just above.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This fails for emojis like 📚 (U+1F4DA BOOKS) which are represented by a surrogate pair (0xD83D 0xDCDA, in the case of 📚) in Javascript's UTF-16 encoding. It will encode U+1F4DA as if it were two codepoints U+D83D and U+DCDA, which aren't actually valid Unicode codepoints, and produce the UTF-8 encoding
"\xed\xa0\xbd\xed\xb3\x9a"
, which is invalid UTF-8 and will either throw an error (on strict UTF-8 parsers) or turn into either two or six � characters, depending on the parser. The correct UTF-8 encoding of U+1F4DA is"\xf0\x9f\x93\x9a"
, which will be encoded and decoded correctly by the code below:This is still not 100% right, as it will produce a garbled string if the input contains a single low surrogate character on its own followed by more non-surrogate text. In my use case, I don't care whether such an invalid string gets garbled beyond recognition, so I haven't bothered writing the extra code that would be necessary to handle that rare corner case. If you need to handle that case, you probably already know enough about surrogate pairs to be able to correctly implement it yourself.
As this is based on @nijikokun's original code, I hereby license all my contributions in this comment under the same MIT license as @nijikokun's original.