-
-
Save bytespider/1007834 to your computer and use it in GitHub Desktop.
140byt.es -- convert string to array of UTF-8 bytes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://en.wikipedia.org/wiki/UTF-8 | |
function stringToByteArray(a,b,c,d,e,f,g,h,i){ | |
for( | |
b = [ | |
e = | |
d = 0 | |
] // initialise variables | |
; | |
c = a.charCodeAt(d++) // get the character code from the string | |
; | |
){ | |
g = 128; | |
c < g // under 128 is UTF-8 (ASCII range), 1 byte | |
? | |
b[e] = c // add to byte array | |
: | |
c < 2048 // under 2048 2bytes | |
? | |
f = 1 // number of bytes left to process | |
: | |
c < 65536 // under 65536 is 3bytes | |
? | |
f = 2 // number of bytes left to process | |
: | |
c < 2<<20 ? // finally, under 2097152 is 4bytes | |
f = 3 // 3bytes left to process | |
: | |
0 | |
; | |
for( // process the remaining bytes indicated by `f` | |
h = e++, | |
i = f | |
; | |
f-- > 0 // -1 and check if greater than 0 still | |
; | |
b[h] = g + (2<<(6-i)) + (c >> i*6) // move onto the next slot in the byte array | |
) | |
b[e++] = g + (c >> f*6 & 63) // shift f * 6 bits, mask 1byte and add 128 | |
} | |
return b // return the byte array | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function(a,b,c,d,e,f,g,h,i){for(b=[e=d=0];c=a.charCodeAt(d++);){g=128;c<g?b[e]=c:c<2048?f=1:c<65536?f=2:c<2<<20?f=3:0;for(h=e++,i=f;f-- >0;b[h]=g+(2<<6-i)+(c>>i*6))b[e++]=g+(c>>f*6&63)}return b} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE | |
Version 2, December 2004 | |
Copyright (C) 2011 YOUR_NAME_HERE <YOUR_URL_HERE> | |
Everyone is permitted to copy and distribute verbatim or modified | |
copies of this license document, and changing it is allowed as long | |
as the name is changed. | |
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE | |
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | |
0. You just DO WHAT THE FUCK YOU WANT TO. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "stringToByteArray", | |
"description": "Convert a string of characters to an array of UTF-8 bytes", | |
"keywords": [ | |
"cryptography", | |
"utf8" | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function stringToByteArray(str) { | |
var bytes = [], code, i; | |
for(i = 0; i < str.length; i++) { | |
code = str.charCodeAt(i); | |
if (code < 128) { | |
bytes.push(code); | |
} else if (code < 2048) { | |
bytes.push(192+(code>>6), 128+(code&63)); | |
} else if (code < 65536) { | |
bytes.push(224+(code>>12), 128+((code>>6)&63), 128+(code&63)); | |
} else if (code < 2097152) { | |
bytes.push(240+(code>>18), 128+((code>>12)&63), 128+((code>>6)&63), 128+(code&63)); | |
} | |
} | |
return bytes; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<title>stringToByteArray</title> | |
<div>Expected value: <b id="ret"></b></div> | |
<div>Actual value: <b id="ret2"></b></div> | |
<script> | |
// write a small example that shows off the API for your example | |
// and tests it in one fell swoop. | |
function stringToByteArrayOld(str) { | |
var bytes = [], code, i; | |
for(i = 0; i < str.length; i++) { | |
code = str.charCodeAt(i); | |
if (code < 128) { | |
bytes.push(code); | |
} else if (code < 2048) { | |
bytes.push(192+(code>>6), 128+(code&63)); | |
} else if (code < 65536) { | |
bytes.push(224+(code>>12), 128+((code>>6)&63), 128+(code&63)); | |
} else if (code < 2097152) { | |
bytes.push(240+(code>>18), 128+((code>>12)&63), 128+((code>>6)&63), 128+(code&63)); | |
} | |
} | |
return bytes; | |
} | |
function stringToByteArray(a,b,c,d,e,f,g,h,i){for(b=[e=d=0];c=a.charCodeAt(d++);){g=128;c<g?b[e]=c:c<2048?f=1:c<65536?f=2:c<2<<20?f=3:0;for(h=e++,i=f;f-- >0;b[h]=g+(2<<6-i)+(c>>i*6))b[e++]=g+(c>>f*6&63)}return b}; | |
document.getElementById( "ret" ).innerHTML = stringToByteArrayOld("hello☺䭢 it works") | |
document.getElementById( "ret2" ).innerHTML = stringToByteArray("hello☺䭢 it works") | |
</script> |
Could save 4 bytes by doing "f=c<2048?1:..." instead of "c<2048?f=1:...". Also another 2 bytes if you do "i=f=c<2048..." and get rid of ",i=f"
Another byte: "65536" -> "g<<9"
@bytespider https://gist.github.com/1008764 the encodeURIComponent based version. It's 132 bytes in length (wrapped) and works well.
hey @bytespider, could you take the trailing comma out of your package.json
keywords?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can get it to 184bytes if we assume that we'll only have 4byte characters