lihnux/StringToByteArray.js

techird · 2018-07-31T08:36:05Z

This is not byte array, this is char code list.

techird · 2018-07-31T08:36:33Z

function unpack(str) {
    var bytes = [];
    for(var i = 0; i < str.length; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8);
        bytes.push(char & 0xFF);
    }
    return bytes;
}

untilbit · 2018-07-31T11:53:50Z

@techird Your code is char code list or byte array?

dinigo · 2018-10-29T17:43:51Z

I think this might also work

[...Buffer.from('hello world')]

Insidexa · 2018-11-19T19:19:38Z

@techird its only latin
use this to, code not my

function toUTF8Array(str) {
    let utf8 = [];
    for (let i = 0; i < str.length; i++) {
        let charcode = str.charCodeAt(i);
        if (charcode < 0x80) utf8.push(charcode);
        else if (charcode < 0x800) {
            utf8.push(0xc0 | (charcode >> 6),
                      0x80 | (charcode & 0x3f));
        }
        else if (charcode < 0xd800 || charcode >= 0xe000) {
            utf8.push(0xe0 | (charcode >> 12),
                      0x80 | ((charcode>>6) & 0x3f),
                      0x80 | (charcode & 0x3f));
        }
        // surrogate pair
        else {
            i++;
            // UTF-16 encodes 0x10000-0x10FFFF by
            // subtracting 0x10000 and splitting the
            // 20 bits of 0x0-0xFFFFF into two halves
            charcode = 0x10000 + (((charcode & 0x3ff)<<10)
                      | (str.charCodeAt(i) & 0x3ff));
            utf8.push(0xf0 | (charcode >>18),
                      0x80 | ((charcode>>12) & 0x3f),
                      0x80 | ((charcode>>6) & 0x3f),
                      0x80 | (charcode & 0x3f));
        }
    }
    return utf8;
}

Insidexa · 2018-11-19T19:21:38Z

@dinigo yes, works, equal to java str.getBytes(Charsets.UTF_8)

icecraft · 2019-12-03T11:39:51Z

@Insidexa Good job ！

Gh0u1L5 · 2020-09-21T05:41:08Z

@Insidexa Imo, these codes are used to convert the char codes from UTF-16 to UTF-8, because the default internal encoding of JS strings is UTF-16. However if all I need is a UTF-16 byte array, I don't have to do so many complex checks and bit operations.

eduardoroliveira · 2021-01-28T00:45:35Z

also Array.from("111122222333344444555")

eduardoroliveira · 2021-01-28T00:53:55Z

if you want to convert to array of numbers you could use

Array.from("1111222223333444445556", (x) => Number(x))

ellcs · 2021-03-14T17:47:12Z

if you want to convert to array of numbers you could use
Array.from("1111222223333444445556", (x) => Number(x))

Array.from("\x00", (x) => Number(x))

results in [NaN]!

If you handle raw bytes in a 0..255 space: Better use a slightly different version with charCodeAt. I didnt test how it behaves with unicode chars.

Array.from("1111222223333444445556", , (x) => x.charCodeAt(0))

paolobertani · 2022-08-26T15:10:57Z

function unpack(str) {
    var bytes = [];
    for(var i = 0; i < str.length; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8);
        bytes.push(char & 0xFF);
    }
    return bytes;
}

this is the correct way to extract the bytes a JavaScript string is made of

String.charCodeAt() returns a 16 bit unsigned integer, it must be split into two bytes if exceeds 0xff

	var url = "Hello World";
	var data = [];
	for (var i = 0; i < url.length; i++){
	data.push(url.charCodeAt(i));
	}

lihnux/StringToByteArray.js

techird commented Jul 31, 2018

Uh oh!

techird commented Jul 31, 2018 •

edited

Loading

Uh oh!

untilbit commented Jul 31, 2018

Uh oh!

dinigo commented Oct 29, 2018

Uh oh!

Insidexa commented Nov 19, 2018

Uh oh!

Insidexa commented Nov 19, 2018

Uh oh!

icecraft commented Dec 3, 2019

Uh oh!

Gh0u1L5 commented Sep 21, 2020

Uh oh!

eduardoroliveira commented Jan 28, 2021

Uh oh!

eduardoroliveira commented Jan 28, 2021

Uh oh!

ellcs commented Mar 14, 2021

Uh oh!

paolobertani commented Aug 26, 2022

Uh oh!

lihnux/StringToByteArray.js

techird commented Jul 31, 2018

Uh oh!

techird commented Jul 31, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

untilbit commented Jul 31, 2018

Uh oh!

dinigo commented Oct 29, 2018

Uh oh!

Insidexa commented Nov 19, 2018

Uh oh!

Insidexa commented Nov 19, 2018

Uh oh!

icecraft commented Dec 3, 2019

Uh oh!

Gh0u1L5 commented Sep 21, 2020

Uh oh!

eduardoroliveira commented Jan 28, 2021

Uh oh!

eduardoroliveira commented Jan 28, 2021

Uh oh!

ellcs commented Mar 14, 2021

Uh oh!

paolobertani commented Aug 26, 2022

Uh oh!

techird commented Jul 31, 2018 •

edited

Loading