Created
January 6, 2015 11:59
-
-
Save ludo6577/af4c39dcb4f88b2a39e2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Simple way: | |
var utf8 = unescape(encodeURIComponent(utf16_str)); | |
//Or (http://stackoverflow.com/questions/18729405/how-to-convert-utf8-string-to-byte-array): | |
function toUTF8Array(str) { | |
var utf8 = []; | |
for (var i=0; i < str.length; i++) { | |
var charcode = str.charCodeAt(i); | |
if (charcode < 0x80) utf8.push(charcode); | |
else if (charcode < 0x800) { | |
utf8.push(0xc0 | (charcode >> 6), | |
0x80 | (charcode & 0x3f)); | |
} | |
else if (charcode < 0xd800 || charcode >= 0xe000) { | |
utf8.push(0xe0 | (charcode >> 12), | |
0x80 | ((charcode>>6) & 0x3f), | |
0x80 | (charcode & 0x3f)); | |
} | |
// surrogate pair | |
else { | |
i++; | |
// UTF-16 encodes 0x10000-0x10FFFF by | |
// subtracting 0x10000 and splitting the | |
// 20 bits of 0x0-0xFFFFF into two halves | |
charcode = 0x10000 + (((charcode & 0x3ff)<<10) | |
| (str.charCodeAt(i) & 0x3ff)) | |
utf8.push(0xf0 | (charcode >>18), | |
0x80 | ((charcode>>12) & 0x3f), | |
0x80 | ((charcode>>6) & 0x3f), | |
0x80 | (charcode & 0x3f)); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment