Created
August 10, 2023 09:53
-
-
Save mped-oticon/8cdff3801365116c2360e6dff5d18f0a to your computer and use it in GitHub Desktop.
git: byte-trunc utf-8 line
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iconv.h> | |
#include <stdio.h> | |
#include <stddef.h> | |
#if 0 | |
#include "utf8.h" | |
#endif | |
typedef unsigned int ucs_char_t; /* assuming 32bit int */ | |
struct interval { | |
ucs_char_t first; | |
ucs_char_t last; | |
}; | |
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) | |
static ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p) | |
{ | |
unsigned char *s = (unsigned char *)*start; | |
ucs_char_t ch; | |
size_t remainder, incr; | |
/* | |
* A caller that assumes NUL terminated text can choose | |
* not to bother with the remainder length. We will | |
* stop at the first NUL. | |
*/ | |
remainder = (remainder_p ? *remainder_p : 999); | |
if (remainder < 1) { | |
goto invalid; | |
} else if (*s < 0x80) { | |
/* 0xxxxxxx */ | |
ch = *s; | |
incr = 1; | |
} else if ((s[0] & 0xe0) == 0xc0) { | |
/* 110XXXXx 10xxxxxx */ | |
if (remainder < 2 || | |
(s[1] & 0xc0) != 0x80 || | |
(s[0] & 0xfe) == 0xc0) | |
goto invalid; | |
ch = ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); | |
incr = 2; | |
} else if ((s[0] & 0xf0) == 0xe0) { | |
/* 1110XXXX 10Xxxxxx 10xxxxxx */ | |
if (remainder < 3 || | |
(s[1] & 0xc0) != 0x80 || | |
(s[2] & 0xc0) != 0x80 || | |
/* overlong? */ | |
(s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || | |
/* surrogate? */ | |
(s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || | |
/* U+FFFE or U+FFFF? */ | |
(s[0] == 0xef && s[1] == 0xbf && | |
(s[2] & 0xfe) == 0xbe)) | |
goto invalid; | |
ch = ((s[0] & 0x0f) << 12) | | |
((s[1] & 0x3f) << 6) | (s[2] & 0x3f); | |
incr = 3; | |
} else if ((s[0] & 0xf8) == 0xf0) { | |
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ | |
if (remainder < 4 || | |
(s[1] & 0xc0) != 0x80 || | |
(s[2] & 0xc0) != 0x80 || | |
(s[3] & 0xc0) != 0x80 || | |
/* overlong? */ | |
(s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || | |
/* > U+10FFFF? */ | |
(s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) | |
goto invalid; | |
ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3f) << 12) | | |
((s[2] & 0x3f) << 6) | (s[3] & 0x3f); | |
incr = 4; | |
} else { | |
invalid: | |
*start = NULL; | |
return 0; | |
} | |
*start += incr; | |
if (remainder_p) | |
*remainder_p = remainder - incr; | |
return ch; | |
} | |
/* auxiliary function for binary search in interval table */ | |
static int bisearch(ucs_char_t ucs, const struct interval *table, int max) | |
{ | |
int min = 0; | |
int mid; | |
if (ucs < table[0].first || ucs > table[max].last) | |
return 0; | |
while (max >= min) { | |
mid = min + (max - min) / 2; | |
if (ucs > table[mid].last) | |
min = mid + 1; | |
else if (ucs < table[mid].first) | |
max = mid - 1; | |
else | |
return 1; | |
} | |
return 0; | |
} | |
static int git_wcwidth(ucs_char_t ch) | |
{ | |
/* | |
* Sorted list of non-overlapping intervals of non-spacing characters, | |
*/ | |
#include "unicode-width.h" | |
/* test for 8-bit control characters */ | |
if (ch == 0) | |
return 0; | |
if (ch < 32 || (ch >= 0x7f && ch < 0xa0)) | |
return -1; | |
/* binary search in table of non-spacing characters */ | |
if (bisearch(ch, zero_width, ARRAY_SIZE(zero_width) - 1)) | |
return 0; | |
/* binary search in table of double width characters */ | |
if (bisearch(ch, double_width, ARRAY_SIZE(double_width) - 1)) | |
return 2; | |
return 1; | |
} | |
/* | |
* This function returns the number of columns occupied by the character | |
* pointed to by the variable start. The pointer is updated to point at | |
* the next character. When remainder_p is not NULL, it points at the | |
* location that stores the number of remaining bytes we can use to pick | |
* a character (see pick_one_utf8_char() above). | |
*/ | |
int utf8_width(const char **start, size_t *remainder_p) | |
{ | |
ucs_char_t ch = pick_one_utf8_char(start, remainder_p); | |
if (!*start) | |
return 0; | |
return git_wcwidth(ch); | |
} | |
static unsigned long sane_truncate_line(char *line, unsigned long len) | |
{ | |
const char *cp; | |
unsigned long allot; | |
size_t l = len; | |
cp = line; | |
allot = l; | |
while (0 < l) { | |
(void) utf8_width(&cp, &l); | |
if (!cp) | |
break; /* truncated in the middle? */ | |
} | |
return allot - l; | |
} | |
static unsigned long byte_truncate_line(char *line, size_t max_bytes) | |
{ | |
const char *cursor = line; | |
size_t accepted = 0; | |
while (cursor[0]) { | |
(void) pick_one_utf8_char(&cursor, NULL); | |
if (cursor - line <= max_bytes) | |
accepted = cursor - line; | |
else | |
return accepted; | |
} | |
return accepted; | |
} | |
int main(int argc, char const *argv[]) | |
{ | |
char *str_plain = "hello world"; /* ASCII */ | |
char *str_emoji = "😀💩👍🏽"; /* Emoji */ | |
char *str_mathematical = "∮∮∮ ∑∫∫ √−1"; /* Mathematical symbols */ | |
char *str_arabic = "السلام عليكم"; /* Arabic text */ | |
char *str_hindi = "नमस्ते"; /* Hindi text */ | |
char *str_combining = "A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔"; /* Combining characters */ | |
char *str_surrogate = "𠜎𠜱𠝹𠱓"; /* Surrogate pairs */ | |
char *str_variation = "A️♂️️"; /* -Variation selector */ | |
char *str_zero = "️♀️♂️"; /* Zero width joiner */ | |
char *str_overlong = "А̣̣"; /* Overlong encoding */ | |
char *str_invalid = "😀�🏽"; /* Invalid code points */ | |
#define FOO(str, n) printf("%.*s\t\t\t: is truncated to %i bytes of %s\n", (int)byte_truncate_line(str, n), str, n, str); | |
for (int i = 0; i < 20; ++i) | |
{ | |
FOO(str_plain, i); | |
FOO(str_emoji, i); | |
FOO(str_mathematical, i); | |
FOO(str_arabic, i); | |
FOO(str_hindi, i); | |
FOO(str_combining, i); | |
FOO(str_surrogate, i); | |
FOO(str_variation, i); | |
FOO(str_zero, i); | |
FOO(str_overlong, i); | |
FOO(str_invalid, i); | |
printf("--------\n"); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
eisbaw in kbnuxcsfw-mped in git on mped_bugfix_lockfile_maxname [?] via 🐍 v2.7.18 | |
❯ nix-shell --run 'gcc mped_adhoc_test.c && ./a.out' -p gettext -p tcl -p expat -p curl -p openssl -p zlib | |
: is truncated to 0 bytes of hello world | |
: is truncated to 0 bytes of 😀💩👍🏽 | |
: is truncated to 0 bytes of ∮∮∮ ∑∫∫ √−1 | |
: is truncated to 0 bytes of السلام عليكم | |
: is truncated to 0 bytes of नमस्ते | |
: is truncated to 0 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
: is truncated to 0 bytes of 𠜎𠜱𠝹𠱓 | |
: is truncated to 0 bytes of A️♂️️ | |
: is truncated to 0 bytes of ️♀️♂️ | |
: is truncated to 0 bytes of А̣̣ | |
: is truncated to 0 bytes of 😀�🏽 | |
-------- | |
h : is truncated to 1 bytes of hello world | |
: is truncated to 1 bytes of 😀💩👍🏽 | |
: is truncated to 1 bytes of ∮∮∮ ∑∫∫ √−1 | |
: is truncated to 1 bytes of السلام عليكم | |
: is truncated to 1 bytes of नमस्ते | |
A : is truncated to 1 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
: is truncated to 1 bytes of 𠜎𠜱𠝹𠱓 | |
A : is truncated to 1 bytes of A️♂️️ | |
: is truncated to 1 bytes of ️♀️♂️ | |
: is truncated to 1 bytes of А̣̣ | |
: is truncated to 1 bytes of 😀�🏽 | |
-------- | |
he : is truncated to 2 bytes of hello world | |
: is truncated to 2 bytes of 😀💩👍🏽 | |
: is truncated to 2 bytes of ∮∮∮ ∑∫∫ √−1 | |
ا : is truncated to 2 bytes of السلام عليكم | |
: is truncated to 2 bytes of नमस्ते | |
A : is truncated to 2 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
: is truncated to 2 bytes of 𠜎𠜱𠝹𠱓 | |
A : is truncated to 2 bytes of A️♂️️ | |
: is truncated to 2 bytes of ️♀️♂️ | |
А : is truncated to 2 bytes of А̣̣ | |
: is truncated to 2 bytes of 😀�🏽 | |
-------- | |
hel : is truncated to 3 bytes of hello world | |
: is truncated to 3 bytes of 😀💩👍🏽 | |
∮ : is truncated to 3 bytes of ∮∮∮ ∑∫∫ √−1 | |
ا : is truncated to 3 bytes of السلام عليكم | |
न : is truncated to 3 bytes of नमस्ते | |
A̷ : is truncated to 3 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
: is truncated to 3 bytes of 𠜎𠜱𠝹𠱓 | |
A : is truncated to 3 bytes of A️♂️️ | |
: is truncated to 3 bytes of ️♀️♂️ | |
А : is truncated to 3 bytes of А̣̣ | |
: is truncated to 3 bytes of 😀�🏽 | |
-------- | |
hell : is truncated to 4 bytes of hello world | |
😀 : is truncated to 4 bytes of 😀💩👍🏽 | |
∮ : is truncated to 4 bytes of ∮∮∮ ∑∫∫ √−1 | |
ال : is truncated to 4 bytes of السلام عليكم | |
न : is truncated to 4 bytes of नमस्ते | |
A̷ : is truncated to 4 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎 : is truncated to 4 bytes of 𠜎𠜱𠝹𠱓 | |
A : is truncated to 4 bytes of A️♂️️ | |
: is truncated to 4 bytes of ️♀️♂️ | |
А̣ : is truncated to 4 bytes of А̣̣ | |
😀 : is truncated to 4 bytes of 😀�🏽 | |
-------- | |
hello : is truncated to 5 bytes of hello world | |
😀 : is truncated to 5 bytes of 😀💩👍🏽 | |
∮ : is truncated to 5 bytes of ∮∮∮ ∑∫∫ √−1 | |
ال : is truncated to 5 bytes of السلام عليكم | |
न : is truncated to 5 bytes of नमस्ते | |
A̷͎ : is truncated to 5 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎 : is truncated to 5 bytes of 𠜎𠜱𠝹𠱓 | |
A : is truncated to 5 bytes of A️♂️️ | |
: is truncated to 5 bytes of ️♀️♂️ | |
А̣ : is truncated to 5 bytes of А̣̣ | |
😀 : is truncated to 5 bytes of 😀�🏽 | |
-------- | |
hello : is truncated to 6 bytes of hello world | |
😀 : is truncated to 6 bytes of 😀💩👍🏽 | |
∮∮ : is truncated to 6 bytes of ∮∮∮ ∑∫∫ √−1 | |
الس : is truncated to 6 bytes of السلام عليكم | |
नम : is truncated to 6 bytes of नमस्ते | |
A̷͎ : is truncated to 6 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎 : is truncated to 6 bytes of 𠜎𠜱𠝹𠱓 | |
A : is truncated to 6 bytes of A️♂️️ | |
: is truncated to 6 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 6 bytes of А̣̣ | |
😀 : is truncated to 6 bytes of 😀�🏽 | |
-------- | |
hello w : is truncated to 7 bytes of hello world | |
😀 : is truncated to 7 bytes of 😀💩👍🏽 | |
∮∮ : is truncated to 7 bytes of ∮∮∮ ∑∫∫ √−1 | |
الس : is truncated to 7 bytes of السلام عليكم | |
नम : is truncated to 7 bytes of नमस्ते | |
A̷͎̝ : is truncated to 7 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎 : is truncated to 7 bytes of 𠜎𠜱𠝹𠱓 | |
A️ : is truncated to 7 bytes of A️♂️️ | |
: is truncated to 7 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 7 bytes of А̣̣ | |
😀� : is truncated to 7 bytes of 😀�🏽 | |
-------- | |
hello wo : is truncated to 8 bytes of hello world | |
😀💩 : is truncated to 8 bytes of 😀💩👍🏽 | |
∮∮ : is truncated to 8 bytes of ∮∮∮ ∑∫∫ √−1 | |
السل : is truncated to 8 bytes of السلام عليكم | |
नम : is truncated to 8 bytes of नमस्ते | |
A̷͎̝ : is truncated to 8 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱 : is truncated to 8 bytes of 𠜎𠜱𠝹𠱓 | |
A️ : is truncated to 8 bytes of A️♂️️ | |
: is truncated to 8 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 8 bytes of А̣̣ | |
😀� : is truncated to 8 bytes of 😀�🏽 | |
-------- | |
hello wor : is truncated to 9 bytes of hello world | |
😀💩 : is truncated to 9 bytes of 😀💩👍🏽 | |
∮∮∮ : is truncated to 9 bytes of ∮∮∮ ∑∫∫ √−1 | |
السل : is truncated to 9 bytes of السلام عليكم | |
नमस : is truncated to 9 bytes of नमस्ते | |
A̷͎̝͕ : is truncated to 9 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱 : is truncated to 9 bytes of 𠜎𠜱𠝹𠱓 | |
A️ : is truncated to 9 bytes of A️♂️️ | |
️ : is truncated to 9 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 9 bytes of А̣̣ | |
😀� : is truncated to 9 bytes of 😀�🏽 | |
-------- | |
hello worl : is truncated to 10 bytes of hello world | |
😀💩 : is truncated to 10 bytes of 😀💩👍🏽 | |
∮∮∮ : is truncated to 10 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلا : is truncated to 10 bytes of السلام عليكم | |
नमस : is truncated to 10 bytes of नमस्ते | |
A̷͎̝͕ : is truncated to 10 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱 : is truncated to 10 bytes of 𠜎𠜱𠝹𠱓 | |
A️ : is truncated to 10 bytes of A️♂️️ | |
️ : is truncated to 10 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 10 bytes of А̣̣ | |
😀� : is truncated to 10 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 11 bytes of hello world | |
😀💩 : is truncated to 11 bytes of 😀💩👍🏽 | |
∮∮∮ : is truncated to 11 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلا : is truncated to 11 bytes of السلام عليكم | |
नमस : is truncated to 11 bytes of नमस्ते | |
A̷͎̝͕͑ : is truncated to 11 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱 : is truncated to 11 bytes of 𠜎𠜱𠝹𠱓 | |
A️ : is truncated to 11 bytes of A️♂️️ | |
️ : is truncated to 11 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 11 bytes of А̣̣ | |
😀�🏽 : is truncated to 11 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 12 bytes of hello world | |
😀💩👍 : is truncated to 12 bytes of 😀💩👍🏽 | |
∮∮∮ : is truncated to 12 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام : is truncated to 12 bytes of السلام عليكم | |
नमस् : is truncated to 12 bytes of नमस्ते | |
A̷͎̝͕͑ : is truncated to 12 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹 : is truncated to 12 bytes of 𠜎𠜱𠝹𠱓 | |
A️ : is truncated to 12 bytes of A️♂️️ | |
️ : is truncated to 12 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 12 bytes of А̣̣ | |
😀�🏽 : is truncated to 12 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 13 bytes of hello world | |
😀💩👍 : is truncated to 13 bytes of 😀💩👍🏽 | |
∮∮∮ ∑ : is truncated to 13 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام : is truncated to 13 bytes of السلام عليكم | |
नमस् : is truncated to 13 bytes of नमस्ते | |
A̷͎̝͕͑͛ : is truncated to 13 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹 : is truncated to 13 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂ : is truncated to 13 bytes of A️♂️️ | |
️ : is truncated to 13 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 13 bytes of А̣̣ | |
😀�🏽 : is truncated to 13 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 14 bytes of hello world | |
😀💩👍 : is truncated to 14 bytes of 😀💩👍🏽 | |
∮∮∮ ∑ : is truncated to 14 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام : is truncated to 14 bytes of السلام عليكم | |
नमस् : is truncated to 14 bytes of नमस्ते | |
A̷͎̝͕͑͛ : is truncated to 14 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹 : is truncated to 14 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂ : is truncated to 14 bytes of A️♂️️ | |
️ : is truncated to 14 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 14 bytes of А̣̣ | |
😀�🏽 : is truncated to 14 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 15 bytes of hello world | |
😀💩👍 : is truncated to 15 bytes of 😀💩👍🏽 | |
∮∮∮ ∑ : is truncated to 15 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام ع : is truncated to 15 bytes of السلام عليكم | |
नमस्त : is truncated to 15 bytes of नमस्ते | |
A̷͎̝͕͑͛̓ : is truncated to 15 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹 : is truncated to 15 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂ : is truncated to 15 bytes of A️♂️️ | |
️ : is truncated to 15 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 15 bytes of А̣̣ | |
😀�🏽 : is truncated to 15 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 16 bytes of hello world | |
😀💩👍🏽 : is truncated to 16 bytes of 😀💩👍🏽 | |
∮∮∮ ∑∫ : is truncated to 16 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام ع : is truncated to 16 bytes of السلام عليكم | |
नमस्त : is truncated to 16 bytes of नमस्ते | |
A̷͎̝͕͑͛̓ : is truncated to 16 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹𠱓 : is truncated to 16 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂️ : is truncated to 16 bytes of A️♂️️ | |
️ : is truncated to 16 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 16 bytes of А̣̣ | |
😀�🏽 : is truncated to 16 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 17 bytes of hello world | |
😀💩👍🏽 : is truncated to 17 bytes of 😀💩👍🏽 | |
∮∮∮ ∑∫ : is truncated to 17 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام عل : is truncated to 17 bytes of السلام عليكم | |
नमस्त : is truncated to 17 bytes of नमस्ते | |
A̷͎̝͕͑͛̓Ȃ : is truncated to 17 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹𠱓 : is truncated to 17 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂️ : is truncated to 17 bytes of A️♂️️ | |
️ : is truncated to 17 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 17 bytes of А̣̣ | |
😀�🏽 : is truncated to 17 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 18 bytes of hello world | |
😀💩👍🏽 : is truncated to 18 bytes of 😀💩👍🏽 | |
∮∮∮ ∑∫ : is truncated to 18 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام عل : is truncated to 18 bytes of السلام عليكم | |
नमस्ते : is truncated to 18 bytes of नमस्ते | |
A̷͎̝͕͑͛̓Ȃ : is truncated to 18 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹𠱓 : is truncated to 18 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂️ : is truncated to 18 bytes of A️♂️️ | |
️♀ : is truncated to 18 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 18 bytes of А̣̣ | |
😀�🏽 : is truncated to 18 bytes of 😀�🏽 | |
-------- | |
hello world : is truncated to 19 bytes of hello world | |
😀💩👍🏽 : is truncated to 19 bytes of 😀💩👍🏽 | |
∮∮∮ ∑∫∫ : is truncated to 19 bytes of ∮∮∮ ∑∫∫ √−1 | |
السلام علي : is truncated to 19 bytes of السلام عليكم | |
नमस्ते : is truncated to 19 bytes of नमस्ते | |
A̷͎̝͕͑͛̓Ȃ̵ : is truncated to 19 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔ | |
𠜎𠜱𠝹𠱓 : is truncated to 19 bytes of 𠜎𠜱𠝹𠱓 | |
A️♂️️ : is truncated to 19 bytes of A️♂️️ | |
️♀ : is truncated to 19 bytes of ️♀️♂️ | |
А̣̣ : is truncated to 19 bytes of А̣̣ | |
😀�🏽 : is truncated to 19 bytes of 😀�🏽 | |
-------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment