Skip to content

Instantly share code, notes, and snippets.

@RandomEtc
Created February 20, 2012 20:36
Show Gist options
  • Save RandomEtc/1871251 to your computer and use it in GitHub Desktop.
Save RandomEtc/1871251 to your computer and use it in GitHub Desktop.
fussing with UTF-16 surrogate encoding in Javascript
// see http://en.wikipedia.org/wiki/UTF-16#Code_points_U.2B0000_to_U.2BD7FF_and_U.2BE000_to_U.2BFFFF
function escapeBMP(n) {
return '\\u'+('0000'+n.toString(16)).slice(-4);
}
// see http://en.wikipedia.org/wiki/UTF-16#Code_points_U.2B10000_to_U.2B10FFFF
function makeSurrogate(n) {
var a = n - 0x10000;
var highBits = a >> 10;
var lowBits = a & 0x3FF;
var leadSurrogate = 0xD800 + highBits;
var trailSurrogate = 0xDC00 + lowBits;
return [ leadSurrogate, trailSurrogate ];
}
function escapeSurrogate(n) {
return makeSurrogate(n).map(escapeBMP).join('');
}
function escapeUTF16(n) {
if (n >= 0x10000 && <= 0x10FFFF) {
// code point is in a higher plane and must be encoded as a surrogate pair
return escapeSurrogate(n);
}
else { // if ((n >= 0x0000 && n <= 0xD7FF) || (n >= U+E000 && n <= U+FFFF)) {
// code point is in the Basic Multilingual Plane
return escapeBMP(n);
}
}
var roseEscaped = escapeUTF16(0x1F339);
assert(JSON.parse('"'+roseEscaped+'"') == "\ud83c\udf39")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment