Skip to content

Instantly share code, notes, and snippets.

@mped-oticon
Created August 10, 2023 09:53
Show Gist options
  • Save mped-oticon/8cdff3801365116c2360e6dff5d18f0a to your computer and use it in GitHub Desktop.
Save mped-oticon/8cdff3801365116c2360e6dff5d18f0a to your computer and use it in GitHub Desktop.
git: byte-trunc utf-8 line
#include <iconv.h>
#include <stdio.h>
#include <stddef.h>
#if 0
#include "utf8.h"
#endif
typedef unsigned int ucs_char_t; /* assuming 32bit int */
struct interval {
ucs_char_t first;
ucs_char_t last;
};
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
static ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p)
{
unsigned char *s = (unsigned char *)*start;
ucs_char_t ch;
size_t remainder, incr;
/*
* A caller that assumes NUL terminated text can choose
* not to bother with the remainder length. We will
* stop at the first NUL.
*/
remainder = (remainder_p ? *remainder_p : 999);
if (remainder < 1) {
goto invalid;
} else if (*s < 0x80) {
/* 0xxxxxxx */
ch = *s;
incr = 1;
} else if ((s[0] & 0xe0) == 0xc0) {
/* 110XXXXx 10xxxxxx */
if (remainder < 2 ||
(s[1] & 0xc0) != 0x80 ||
(s[0] & 0xfe) == 0xc0)
goto invalid;
ch = ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
incr = 2;
} else if ((s[0] & 0xf0) == 0xe0) {
/* 1110XXXX 10Xxxxxx 10xxxxxx */
if (remainder < 3 ||
(s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80 ||
/* overlong? */
(s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) ||
/* surrogate? */
(s[0] == 0xed && (s[1] & 0xe0) == 0xa0) ||
/* U+FFFE or U+FFFF? */
(s[0] == 0xef && s[1] == 0xbf &&
(s[2] & 0xfe) == 0xbe))
goto invalid;
ch = ((s[0] & 0x0f) << 12) |
((s[1] & 0x3f) << 6) | (s[2] & 0x3f);
incr = 3;
} else if ((s[0] & 0xf8) == 0xf0) {
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
if (remainder < 4 ||
(s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80 ||
(s[3] & 0xc0) != 0x80 ||
/* overlong? */
(s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) ||
/* > U+10FFFF? */
(s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4)
goto invalid;
ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3f) << 12) |
((s[2] & 0x3f) << 6) | (s[3] & 0x3f);
incr = 4;
} else {
invalid:
*start = NULL;
return 0;
}
*start += incr;
if (remainder_p)
*remainder_p = remainder - incr;
return ch;
}
/* auxiliary function for binary search in interval table */
static int bisearch(ucs_char_t ucs, const struct interval *table, int max)
{
int min = 0;
int mid;
if (ucs < table[0].first || ucs > table[max].last)
return 0;
while (max >= min) {
mid = min + (max - min) / 2;
if (ucs > table[mid].last)
min = mid + 1;
else if (ucs < table[mid].first)
max = mid - 1;
else
return 1;
}
return 0;
}
static int git_wcwidth(ucs_char_t ch)
{
/*
* Sorted list of non-overlapping intervals of non-spacing characters,
*/
#include "unicode-width.h"
/* test for 8-bit control characters */
if (ch == 0)
return 0;
if (ch < 32 || (ch >= 0x7f && ch < 0xa0))
return -1;
/* binary search in table of non-spacing characters */
if (bisearch(ch, zero_width, ARRAY_SIZE(zero_width) - 1))
return 0;
/* binary search in table of double width characters */
if (bisearch(ch, double_width, ARRAY_SIZE(double_width) - 1))
return 2;
return 1;
}
/*
* This function returns the number of columns occupied by the character
* pointed to by the variable start. The pointer is updated to point at
* the next character. When remainder_p is not NULL, it points at the
* location that stores the number of remaining bytes we can use to pick
* a character (see pick_one_utf8_char() above).
*/
int utf8_width(const char **start, size_t *remainder_p)
{
ucs_char_t ch = pick_one_utf8_char(start, remainder_p);
if (!*start)
return 0;
return git_wcwidth(ch);
}
static unsigned long sane_truncate_line(char *line, unsigned long len)
{
const char *cp;
unsigned long allot;
size_t l = len;
cp = line;
allot = l;
while (0 < l) {
(void) utf8_width(&cp, &l);
if (!cp)
break; /* truncated in the middle? */
}
return allot - l;
}
static unsigned long byte_truncate_line(char *line, size_t max_bytes)
{
const char *cursor = line;
size_t accepted = 0;
while (cursor[0]) {
(void) pick_one_utf8_char(&cursor, NULL);
if (cursor - line <= max_bytes)
accepted = cursor - line;
else
return accepted;
}
return accepted;
}
int main(int argc, char const *argv[])
{
char *str_plain = "hello world"; /* ASCII */
char *str_emoji = "😀💩👍🏽"; /* Emoji */
char *str_mathematical = "∮∮∮ ∑∫∫ √−1"; /* Mathematical symbols */
char *str_arabic = "السلام عليكم"; /* Arabic text */
char *str_hindi = "नमस्ते"; /* Hindi text */
char *str_combining = "A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔"; /* Combining characters */
char *str_surrogate = "𠜎𠜱𠝹𠱓"; /* Surrogate pairs */
char *str_variation = "A‍️‍♂️️"; /* -Variation selector */
char *str_zero = "​‍️​‍♀️​‍♂️​"; /* Zero width joiner */
char *str_overlong = "А̣̣"; /* Overlong encoding */
char *str_invalid = "😀�🏽"; /* Invalid code points */
#define FOO(str, n) printf("%.*s\t\t\t: is truncated to %i bytes of %s\n", (int)byte_truncate_line(str, n), str, n, str);
for (int i = 0; i < 20; ++i)
{
FOO(str_plain, i);
FOO(str_emoji, i);
FOO(str_mathematical, i);
FOO(str_arabic, i);
FOO(str_hindi, i);
FOO(str_combining, i);
FOO(str_surrogate, i);
FOO(str_variation, i);
FOO(str_zero, i);
FOO(str_overlong, i);
FOO(str_invalid, i);
printf("--------\n");
}
return 0;
}
eisbaw in kbnuxcsfw-mped in git on  mped_bugfix_lockfile_maxname [?] via 🐍 v2.7.18
❯ nix-shell --run 'gcc mped_adhoc_test.c && ./a.out' -p gettext -p tcl -p expat -p curl -p openssl -p zlib
: is truncated to 0 bytes of hello world
: is truncated to 0 bytes of 😀💩👍🏽
: is truncated to 0 bytes of ∮∮∮ ∑∫∫ √−1
: is truncated to 0 bytes of السلام عليكم
: is truncated to 0 bytes of नमस्ते
: is truncated to 0 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
: is truncated to 0 bytes of 𠜎𠜱𠝹𠱓
: is truncated to 0 bytes of A‍️‍♂️️
: is truncated to 0 bytes of ​‍️​‍♀️​‍♂️​
: is truncated to 0 bytes of А̣̣
: is truncated to 0 bytes of 😀�🏽
--------
h : is truncated to 1 bytes of hello world
: is truncated to 1 bytes of 😀💩👍🏽
: is truncated to 1 bytes of ∮∮∮ ∑∫∫ √−1
: is truncated to 1 bytes of السلام عليكم
: is truncated to 1 bytes of नमस्ते
A : is truncated to 1 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
: is truncated to 1 bytes of 𠜎𠜱𠝹𠱓
A : is truncated to 1 bytes of A‍️‍♂️️
: is truncated to 1 bytes of ​‍️​‍♀️​‍♂️​
: is truncated to 1 bytes of А̣̣
: is truncated to 1 bytes of 😀�🏽
--------
he : is truncated to 2 bytes of hello world
: is truncated to 2 bytes of 😀💩👍🏽
: is truncated to 2 bytes of ∮∮∮ ∑∫∫ √−1
ا : is truncated to 2 bytes of السلام عليكم
: is truncated to 2 bytes of नमस्ते
A : is truncated to 2 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
: is truncated to 2 bytes of 𠜎𠜱𠝹𠱓
A : is truncated to 2 bytes of A‍️‍♂️️
: is truncated to 2 bytes of ​‍️​‍♀️​‍♂️​
А : is truncated to 2 bytes of А̣̣
: is truncated to 2 bytes of 😀�🏽
--------
hel : is truncated to 3 bytes of hello world
: is truncated to 3 bytes of 😀💩👍🏽
∮ : is truncated to 3 bytes of ∮∮∮ ∑∫∫ √−1
ا : is truncated to 3 bytes of السلام عليكم
न : is truncated to 3 bytes of नमस्ते
A̷ : is truncated to 3 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
: is truncated to 3 bytes of 𠜎𠜱𠝹𠱓
A : is truncated to 3 bytes of A‍️‍♂️️
​ : is truncated to 3 bytes of ​‍️​‍♀️​‍♂️​
А : is truncated to 3 bytes of А̣̣
: is truncated to 3 bytes of 😀�🏽
--------
hell : is truncated to 4 bytes of hello world
😀 : is truncated to 4 bytes of 😀💩👍🏽
∮ : is truncated to 4 bytes of ∮∮∮ ∑∫∫ √−1
ال : is truncated to 4 bytes of السلام عليكم
न : is truncated to 4 bytes of नमस्ते
A̷ : is truncated to 4 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎 : is truncated to 4 bytes of 𠜎𠜱𠝹𠱓
A‍ : is truncated to 4 bytes of A‍️‍♂️️
​ : is truncated to 4 bytes of ​‍️​‍♀️​‍♂️​
А̣ : is truncated to 4 bytes of А̣̣
😀 : is truncated to 4 bytes of 😀�🏽
--------
hello : is truncated to 5 bytes of hello world
😀 : is truncated to 5 bytes of 😀💩👍🏽
∮ : is truncated to 5 bytes of ∮∮∮ ∑∫∫ √−1
ال : is truncated to 5 bytes of السلام عليكم
न : is truncated to 5 bytes of नमस्ते
A̷͎ : is truncated to 5 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎 : is truncated to 5 bytes of 𠜎𠜱𠝹𠱓
A‍ : is truncated to 5 bytes of A‍️‍♂️️
​ : is truncated to 5 bytes of ​‍️​‍♀️​‍♂️​
А̣ : is truncated to 5 bytes of А̣̣
😀 : is truncated to 5 bytes of 😀�🏽
--------
hello : is truncated to 6 bytes of hello world
😀 : is truncated to 6 bytes of 😀💩👍🏽
∮∮ : is truncated to 6 bytes of ∮∮∮ ∑∫∫ √−1
الس : is truncated to 6 bytes of السلام عليكم
नम : is truncated to 6 bytes of नमस्ते
A̷͎ : is truncated to 6 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎 : is truncated to 6 bytes of 𠜎𠜱𠝹𠱓
A‍ : is truncated to 6 bytes of A‍️‍♂️️
​‍ : is truncated to 6 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 6 bytes of А̣̣
😀 : is truncated to 6 bytes of 😀�🏽
--------
hello w : is truncated to 7 bytes of hello world
😀 : is truncated to 7 bytes of 😀💩👍🏽
∮∮ : is truncated to 7 bytes of ∮∮∮ ∑∫∫ √−1
الس : is truncated to 7 bytes of السلام عليكم
नम : is truncated to 7 bytes of नमस्ते
A̷͎̝ : is truncated to 7 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎 : is truncated to 7 bytes of 𠜎𠜱𠝹𠱓
A‍️ : is truncated to 7 bytes of A‍️‍♂️️
​‍ : is truncated to 7 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 7 bytes of А̣̣
😀� : is truncated to 7 bytes of 😀�🏽
--------
hello wo : is truncated to 8 bytes of hello world
😀💩 : is truncated to 8 bytes of 😀💩👍🏽
∮∮ : is truncated to 8 bytes of ∮∮∮ ∑∫∫ √−1
السل : is truncated to 8 bytes of السلام عليكم
नम : is truncated to 8 bytes of नमस्ते
A̷͎̝ : is truncated to 8 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱 : is truncated to 8 bytes of 𠜎𠜱𠝹𠱓
A‍️ : is truncated to 8 bytes of A‍️‍♂️️
​‍ : is truncated to 8 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 8 bytes of А̣̣
😀� : is truncated to 8 bytes of 😀�🏽
--------
hello wor : is truncated to 9 bytes of hello world
😀💩 : is truncated to 9 bytes of 😀💩👍🏽
∮∮∮ : is truncated to 9 bytes of ∮∮∮ ∑∫∫ √−1
السل : is truncated to 9 bytes of السلام عليكم
नमस : is truncated to 9 bytes of नमस्ते
A̷͎̝͕ : is truncated to 9 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱 : is truncated to 9 bytes of 𠜎𠜱𠝹𠱓
A‍️ : is truncated to 9 bytes of A‍️‍♂️️
​‍️ : is truncated to 9 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 9 bytes of А̣̣
😀� : is truncated to 9 bytes of 😀�🏽
--------
hello worl : is truncated to 10 bytes of hello world
😀💩 : is truncated to 10 bytes of 😀💩👍🏽
∮∮∮ : is truncated to 10 bytes of ∮∮∮ ∑∫∫ √−1
السلا : is truncated to 10 bytes of السلام عليكم
नमस : is truncated to 10 bytes of नमस्ते
A̷͎̝͕ : is truncated to 10 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱 : is truncated to 10 bytes of 𠜎𠜱𠝹𠱓
A‍️‍ : is truncated to 10 bytes of A‍️‍♂️️
​‍️ : is truncated to 10 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 10 bytes of А̣̣
😀� : is truncated to 10 bytes of 😀�🏽
--------
hello world : is truncated to 11 bytes of hello world
😀💩 : is truncated to 11 bytes of 😀💩👍🏽
∮∮∮ : is truncated to 11 bytes of ∮∮∮ ∑∫∫ √−1
السلا : is truncated to 11 bytes of السلام عليكم
नमस : is truncated to 11 bytes of नमस्ते
A̷͎̝͕͑ : is truncated to 11 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱 : is truncated to 11 bytes of 𠜎𠜱𠝹𠱓
A‍️‍ : is truncated to 11 bytes of A‍️‍♂️️
​‍️ : is truncated to 11 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 11 bytes of А̣̣
😀�🏽 : is truncated to 11 bytes of 😀�🏽
--------
hello world : is truncated to 12 bytes of hello world
😀💩👍 : is truncated to 12 bytes of 😀💩👍🏽
∮∮∮ : is truncated to 12 bytes of ∮∮∮ ∑∫∫ √−1
السلام : is truncated to 12 bytes of السلام عليكم
नमस् : is truncated to 12 bytes of नमस्ते
A̷͎̝͕͑ : is truncated to 12 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹 : is truncated to 12 bytes of 𠜎𠜱𠝹𠱓
A‍️‍ : is truncated to 12 bytes of A‍️‍♂️️
​‍️​ : is truncated to 12 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 12 bytes of А̣̣
😀�🏽 : is truncated to 12 bytes of 😀�🏽
--------
hello world : is truncated to 13 bytes of hello world
😀💩👍 : is truncated to 13 bytes of 😀💩👍🏽
∮∮∮ ∑ : is truncated to 13 bytes of ∮∮∮ ∑∫∫ √−1
السلام : is truncated to 13 bytes of السلام عليكم
नमस् : is truncated to 13 bytes of नमस्ते
A̷͎̝͕͑͛ : is truncated to 13 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹 : is truncated to 13 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂ : is truncated to 13 bytes of A‍️‍♂️️
​‍️​ : is truncated to 13 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 13 bytes of А̣̣
😀�🏽 : is truncated to 13 bytes of 😀�🏽
--------
hello world : is truncated to 14 bytes of hello world
😀💩👍 : is truncated to 14 bytes of 😀💩👍🏽
∮∮∮ ∑ : is truncated to 14 bytes of ∮∮∮ ∑∫∫ √−1
السلام : is truncated to 14 bytes of السلام عليكم
नमस् : is truncated to 14 bytes of नमस्ते
A̷͎̝͕͑͛ : is truncated to 14 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹 : is truncated to 14 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂ : is truncated to 14 bytes of A‍️‍♂️️
​‍️​ : is truncated to 14 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 14 bytes of А̣̣
😀�🏽 : is truncated to 14 bytes of 😀�🏽
--------
hello world : is truncated to 15 bytes of hello world
😀💩👍 : is truncated to 15 bytes of 😀💩👍🏽
∮∮∮ ∑ : is truncated to 15 bytes of ∮∮∮ ∑∫∫ √−1
السلام ع : is truncated to 15 bytes of السلام عليكم
नमस्त : is truncated to 15 bytes of नमस्ते
A̷͎̝͕͑͛̓ : is truncated to 15 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹 : is truncated to 15 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂ : is truncated to 15 bytes of A‍️‍♂️️
​‍️​‍ : is truncated to 15 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 15 bytes of А̣̣
😀�🏽 : is truncated to 15 bytes of 😀�🏽
--------
hello world : is truncated to 16 bytes of hello world
😀💩👍🏽 : is truncated to 16 bytes of 😀💩👍🏽
∮∮∮ ∑∫ : is truncated to 16 bytes of ∮∮∮ ∑∫∫ √−1
السلام ع : is truncated to 16 bytes of السلام عليكم
नमस्त : is truncated to 16 bytes of नमस्ते
A̷͎̝͕͑͛̓ : is truncated to 16 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹𠱓 : is truncated to 16 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂️ : is truncated to 16 bytes of A‍️‍♂️️
​‍️​‍ : is truncated to 16 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 16 bytes of А̣̣
😀�🏽 : is truncated to 16 bytes of 😀�🏽
--------
hello world : is truncated to 17 bytes of hello world
😀💩👍🏽 : is truncated to 17 bytes of 😀💩👍🏽
∮∮∮ ∑∫ : is truncated to 17 bytes of ∮∮∮ ∑∫∫ √−1
السلام عل : is truncated to 17 bytes of السلام عليكم
नमस्त : is truncated to 17 bytes of नमस्ते
A̷͎̝͕͑͛̓Ȃ : is truncated to 17 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹𠱓 : is truncated to 17 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂️ : is truncated to 17 bytes of A‍️‍♂️️
​‍️​‍ : is truncated to 17 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 17 bytes of А̣̣
😀�🏽 : is truncated to 17 bytes of 😀�🏽
--------
hello world : is truncated to 18 bytes of hello world
😀💩👍🏽 : is truncated to 18 bytes of 😀💩👍🏽
∮∮∮ ∑∫ : is truncated to 18 bytes of ∮∮∮ ∑∫∫ √−1
السلام عل : is truncated to 18 bytes of السلام عليكم
नमस्ते : is truncated to 18 bytes of नमस्ते
A̷͎̝͕͑͛̓Ȃ : is truncated to 18 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹𠱓 : is truncated to 18 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂️ : is truncated to 18 bytes of A‍️‍♂️️
​‍️​‍♀ : is truncated to 18 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 18 bytes of А̣̣
😀�🏽 : is truncated to 18 bytes of 😀�🏽
--------
hello world : is truncated to 19 bytes of hello world
😀💩👍🏽 : is truncated to 19 bytes of 😀💩👍🏽
∮∮∮ ∑∫∫ : is truncated to 19 bytes of ∮∮∮ ∑∫∫ √−1
السلام علي : is truncated to 19 bytes of السلام عليكم
नमस्ते : is truncated to 19 bytes of नमस्ते
A̷͎̝͕͑͛̓Ȃ̵ : is truncated to 19 bytes of A̷͎̝͕͑͛̓Ȃ̵͙͊̄͘ạ̸̫͖͔͂̔̕ā̶͖͖͐̈́̔
𠜎𠜱𠝹𠱓 : is truncated to 19 bytes of 𠜎𠜱𠝹𠱓
A‍️‍♂️️ : is truncated to 19 bytes of A‍️‍♂️️
​‍️​‍♀ : is truncated to 19 bytes of ​‍️​‍♀️​‍♂️​
А̣̣ : is truncated to 19 bytes of А̣̣
😀�🏽 : is truncated to 19 bytes of 😀�🏽
--------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment