Skip to content

Instantly share code, notes, and snippets.

@mikeash
Created November 8, 2015 03:35
Show Gist options
  • Save mikeash/39dc06cd35f1947cc075 to your computer and use it in GitHub Desktop.
Save mikeash/39dc06cd35f1947cc075 to your computer and use it in GitHub Desktop.
static inline const uint8_t *DecodeOneUTF8(const uint8_t *utf8, uint32_t *outCodepoint) {
uint8_t byteOne = utf8[0];
uint8_t byteTwo = utf8[1];
uint8_t byteThree = utf8[2];
uint8_t byteFour = utf8[3];
uint8_t bit1 = byteOne >> 7;
// uint8_t bit2 = (byteOne >> 6) & 1;
uint8_t bit3 = (byteOne >> 5) & 1;
uint8_t bit4 = (byteOne >> 4) & 1;
// uint8_t bit5 = (byteOne >> 3) & 1;
uint8_t hasTwo = bit1;
uint8_t hasThree = hasTwo & bit3;
uint8_t hasFour = hasThree & bit4;
uint8_t byteOneBits = 7 - hasTwo - hasTwo - hasThree - hasFour;
uint8_t byteOneMask = 0xff >> (8 - byteOneBits);
uint32_t codepoint = byteOne & byteOneMask;
uint8_t trailingMask = 0x3f;
codepoint <<= hasTwo * 6;
codepoint |= hasTwo * (byteTwo & trailingMask);
codepoint <<= hasThree * 6;
codepoint |= hasThree * (byteThree & trailingMask);
codepoint <<= hasFour * 6;
codepoint |= hasFour * (byteFour & trailingMask);
*outCodepoint = codepoint;
return utf8 + 1 + hasTwo + hasThree + hasFour;
}
size_t DecodeUTF8(const uint8_t *utf8, uint32_t *destination) {
size_t length = 0;
while(*utf8 != '\0') {
utf8 = DecodeOneUTF8(utf8, destination);
destination++;
length++;
}
return length;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment