Skip to content

Instantly share code, notes, and snippets.

@assyrianic
Last active August 20, 2023 09:02
Show Gist options
  • Save assyrianic/c17af7cbe506ddf6fbd23c0992ffd74d to your computer and use it in GitHub Desktop.
Save assyrianic/c17af7cbe506ddf6fbd23c0992ffd74d to your computer and use it in GitHub Desktop.
#if defined _utf8_included
#endinput
#endif
#define _utf8_included
#include <sourcemod>
stock int GetUTF8Len(int c) {
for( int i=7; i < 8; i-- ) {
if( !(c & (1 << i)) ) {
return (7 - i)==0? 1 : 7 - i;
}
}
return -1;
}
stock int GetRuneBytes(int rune) {
if( rune < 0x80 ) {
return 1;
} else if( rune < 0x800 ) {
return 2;
} else if( rune < 0x10000 ) {
return 3;
} else if( rune < 0x110000 ) {
return 4;
}
return 0;
}
stock int UTF8CharToRune(const char[] cstr, int &read=0) {
read = GetUTF8Len(cstr[0]);
if( read <= 1 ) {
return cstr[0];
}
int rune;
for( int i=1; i < read; i++ ) {
if( (cstr[i] & 0xc0) != 0x80 ) {
return 0;
}
}
switch( read ) {
case 2: rune = ((cstr[0] & 0x1F) << 6) | (cstr[1] & 0x3F);
case 3: rune = ((cstr[0] & 0xF) << 12) | ((cstr[1] & 0x3F) << 6) | (cstr[2] & 0x3F);
case 4: rune = ((cstr[0] & 0x7) << 18) | ((cstr[1] & 0x3F) << 12) | ((cstr[2] & 0x3F) << 6) | (cstr[3] & 0x3F);
default: return 0;
}
return rune;
}
stock int GetUTF8StrLen(const char[] str) {
int runes = 0;
for( int i; str[i] != 0; ) {
i += GetUTF8Len(str[i]);
runes++;
}
return runes + 1;
}
stock void UTF8StrToRunes(const char[] str, int[] runes) {
int i, n;
while( str[i] != 0 ) {
int bytes_read;
int chr = UTF8CharToRune(str[i], bytes_read);
if( chr==0 ) {
break;
}
i += bytes_read;
runes[n] = chr;
n++;
}
runes[n] = 0;
}
stock int GetRuneStrLen(const int[] runes) {
int bytes;
for( int i; runes[i] != 0; i++ ) {
bytes += GetRuneBytes(runes[i]);
}
return bytes + 1;
}
stock bool RunesToUTF8Str(const int[] runes, char[] buf) {
int n;
for( int i; runes[i] != 0; i++ ) {
int rune = runes[i];
int bytes = GetRuneBytes(rune);
switch( bytes ) {
case 1: {
buf[n] = rune;
}
case 2: {
buf[ n ] = (0xC0 | (rune >> 6));
buf[n+1] = (0x80 | (rune & 0x3F));
}
case 3: {
buf[ n ] = (0xE0 | (rune >> 12));
buf[n+1] = (0x80 | ((rune >> 6) & 0x3F));
buf[n+2] = (0x80 | (rune & 0x3F));
}
case 4: {
buf[ n ] = (0xF0 | (rune >> 18));
buf[n+1] = (0x80 | ((rune >> 12) & 0x3F));
buf[n+2] = (0x80 | ((rune >> 6) & 0x3F));
buf[n+3] = (0x80 | (rune & 0x3F));
}
default: {
return false;
}
}
n += bytes;
}
buf[n] = 0;
return n > 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment