Last active
August 29, 2015 14:13
-
-
Save ttahmouch/2bcd3b2aecf6e8d337c9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| Code Point (Base 16) Code Unit (Base 2) Code Unit (Base 16) | |
| 0x000000 - 0x00007F 0xxx xxxx 0x00 | |
| 0x000080 - 0x0007FF 110x xxxx 10xx xxxx 0xC0 0x80 | |
| 0x000800 - 0x00FFFF 1110 xxxx 10xx xxxx 10xx xxxx 0xE0 0x80 0x80 | |
| 0x010000 - 0x10FFFF 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx 0xF0 0x80 0x80 0x80 | |
| Encoding a UTF-8 character: | |
| 1. Determine the Code Unit required using the Code Point. | |
| 2. Push in the bits marked x from the bits of the Code Point. | |
| Decoding a UTF-8 character: | |
| 1. Determine the Code Unit size from its high-order bits. | |
| 3. Pull out the bits marked x from the bits of the Code Unit. | |
| **/ | |
| function Utf8 () { | |
| return this; | |
| } | |
| Utf8.encode = function () { | |
| var unit = ''; | |
| arguments = Array.isArray(arguments[0]) ? arguments[0] : arguments; | |
| for (var code in arguments) { | |
| code = arguments[code]; | |
| if (typeof code === 'number' && code >= 0) { | |
| if (code <= 0x7F) { | |
| code = [ 0x00 | code ]; | |
| } else if (code <= 0x0007FF) { | |
| code = [ 0xC0 | code >>> 06, 0x80 | code & 0x3F ]; | |
| } else if (code <= 0x00FFFF) { | |
| code = [ 0xE0 | code >>> 12, 0x80 | code >>> 06 & 0x3F, 0x80 | code & 0x3F ]; | |
| } else if (code <= 0x10FFFF) { | |
| code = [ 0xF0 | code >>> 18, 0x80 | code >>> 12 & 0x3F, 0x80 | code >>> 06 & 0x3F, 0x80 | code & 0x3F ]; | |
| } else { | |
| code = []; | |
| } | |
| unit += String.fromCharCode.apply(null, code); | |
| } | |
| } | |
| return unit; | |
| }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment