Created
November 8, 2015 03:35
-
-
Save mikeash/39dc06cd35f1947cc075 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static inline const uint8_t *DecodeOneUTF8(const uint8_t *utf8, uint32_t *outCodepoint) { | |
uint8_t byteOne = utf8[0]; | |
uint8_t byteTwo = utf8[1]; | |
uint8_t byteThree = utf8[2]; | |
uint8_t byteFour = utf8[3]; | |
uint8_t bit1 = byteOne >> 7; | |
// uint8_t bit2 = (byteOne >> 6) & 1; | |
uint8_t bit3 = (byteOne >> 5) & 1; | |
uint8_t bit4 = (byteOne >> 4) & 1; | |
// uint8_t bit5 = (byteOne >> 3) & 1; | |
uint8_t hasTwo = bit1; | |
uint8_t hasThree = hasTwo & bit3; | |
uint8_t hasFour = hasThree & bit4; | |
uint8_t byteOneBits = 7 - hasTwo - hasTwo - hasThree - hasFour; | |
uint8_t byteOneMask = 0xff >> (8 - byteOneBits); | |
uint32_t codepoint = byteOne & byteOneMask; | |
uint8_t trailingMask = 0x3f; | |
codepoint <<= hasTwo * 6; | |
codepoint |= hasTwo * (byteTwo & trailingMask); | |
codepoint <<= hasThree * 6; | |
codepoint |= hasThree * (byteThree & trailingMask); | |
codepoint <<= hasFour * 6; | |
codepoint |= hasFour * (byteFour & trailingMask); | |
*outCodepoint = codepoint; | |
return utf8 + 1 + hasTwo + hasThree + hasFour; | |
} | |
size_t DecodeUTF8(const uint8_t *utf8, uint32_t *destination) { | |
size_t length = 0; | |
while(*utf8 != '\0') { | |
utf8 = DecodeOneUTF8(utf8, destination); | |
destination++; | |
length++; | |
} | |
return length; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment