-
-
Save lihnux/2aa4a6f5a9170974f6aa to your computer and use it in GitHub Desktop.
var url = "Hello World"; | |
var data = []; | |
for (var i = 0; i < url.length; i++){ | |
data.push(url.charCodeAt(i)); | |
} |
function unpack(str) {
var bytes = [];
for(var i = 0; i < str.length; i++) {
var char = str.charCodeAt(i);
bytes.push(char >>> 8);
bytes.push(char & 0xFF);
}
return bytes;
}
@techird Your code is char code list or byte array?
I think this might also work
[...Buffer.from('hello world')]
@techird its only latin
use this to, code not my
function toUTF8Array(str) {
let utf8 = [];
for (let i = 0; i < str.length; i++) {
let charcode = str.charCodeAt(i);
if (charcode < 0x80) utf8.push(charcode);
else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
}
else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
// surrogate pair
else {
i++;
// UTF-16 encodes 0x10000-0x10FFFF by
// subtracting 0x10000 and splitting the
// 20 bits of 0x0-0xFFFFF into two halves
charcode = 0x10000 + (((charcode & 0x3ff)<<10)
| (str.charCodeAt(i) & 0x3ff));
utf8.push(0xf0 | (charcode >>18),
0x80 | ((charcode>>12) & 0x3f),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return utf8;
}
@dinigo yes, works, equal to java str.getBytes(Charsets.UTF_8)
@Insidexa Good job !
@Insidexa Imo, these codes are used to convert the char codes from UTF-16 to UTF-8, because the default internal encoding of JS strings is UTF-16. However if all I need is a UTF-16 byte array, I don't have to do so many complex checks and bit operations.
also Array.from("111122222333344444555")
if you want to convert to array of numbers you could use
Array.from("1111222223333444445556", (x) => Number(x))
if you want to convert to array of numbers you could use
Array.from("1111222223333444445556", (x) => Number(x))
Array.from("\x00", (x) => Number(x))
results in [NaN]
!
If you handle raw bytes in a 0..255 space: Better use a slightly different version with charCodeAt
. I didnt test how it behaves with unicode chars.
Array.from("1111222223333444445556", , (x) => x.charCodeAt(0))
function unpack(str) { var bytes = []; for(var i = 0; i < str.length; i++) { var char = str.charCodeAt(i); bytes.push(char >>> 8); bytes.push(char & 0xFF); } return bytes; }
this is the correct way to extract the bytes a JavaScript string is made of
String.charCodeAt() returns a 16 bit unsigned integer, it must be split into two bytes if exceeds 0xff
This is not byte array, this is char code list.