Last active
November 18, 2024 13:24
-
-
Save MightyPork/52eda3e5677b4b03524e40c9f0ab1da5 to your computer and use it in GitHub Desktop.
C function to encode a Unicode code point as UTF-8 byte array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
/** | |
* Encode a code point using UTF-8 | |
* | |
* @author Ondřej Hruška <[email protected]> | |
* @license MIT | |
* | |
* @param out - output buffer (min 5 characters), will be 0-terminated | |
* @param utf - code point 0-0x10FFFF | |
* @return number of bytes on success, 0 on failure (also produces U+FFFD, which uses 3 bytes) | |
*/ | |
int utf8_encode(char *out, uint32_t utf) | |
{ | |
if (utf <= 0x7F) { | |
// Plain ASCII | |
out[0] = (char) utf; | |
out[1] = 0; | |
return 1; | |
} | |
else if (utf <= 0x07FF) { | |
// 2-byte unicode | |
out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0); | |
out[1] = (char) (((utf >> 0) & 0x3F) | 0x80); | |
out[2] = 0; | |
return 2; | |
} | |
else if (utf <= 0xFFFF) { | |
// 3-byte unicode | |
out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0); | |
out[1] = (char) (((utf >> 6) & 0x3F) | 0x80); | |
out[2] = (char) (((utf >> 0) & 0x3F) | 0x80); | |
out[3] = 0; | |
return 3; | |
} | |
else if (utf <= 0x10FFFF) { | |
// 4-byte unicode | |
out[0] = (char) (((utf >> 18) & 0x07) | 0xF0); | |
out[1] = (char) (((utf >> 12) & 0x3F) | 0x80); | |
out[2] = (char) (((utf >> 6) & 0x3F) | 0x80); | |
out[3] = (char) (((utf >> 0) & 0x3F) | 0x80); | |
out[4] = 0; | |
return 4; | |
} | |
else { | |
// error - use replacement character | |
out[0] = (char) 0xEF; | |
out[1] = (char) 0xBF; | |
out[2] = (char) 0xBD; | |
out[3] = 0; | |
return 0; | |
} | |
} |
Please stop posting to this gist, it is NOT related to the error you're getting and I don't have the time or the will to troubleshoot your program. StackOverflow is the place to get help.
Eugenetucha are you assuming this snippet is a standalone program? You need to provide your own main() or WinMain() that uses it, or compile it as library or linkable object.
how to convet a double to UTF-8 bytes?
@wangjunfeng90214 what do you mean by double?
@MightyPork just wanted to say thanks, due to this i dont have to rely on the icu library! <3
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
are you using linux?