Created
January 13, 2014 23:11
-
-
Save davidglezz/8409916 to your computer and use it in GitHub Desktop.
utf16_to_utf8 function, untested by me, but I hope it works well, comes from opusfile with some modifications.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <string.h> | |
#include <wchar.h> | |
static char* utf16_to_utf8 (const wchar_t *src) | |
{ | |
size_t len = wcslen(src), si, di; | |
char *dst = (char*)malloc(sizeof(*dst)*(3*len+1)); | |
if (dst == NULL) | |
return dst; | |
for (di = si = 0; si < len; si++) | |
{ | |
unsigned c0 = src[si]; | |
if (c0 < 0x80) | |
{ | |
/*Can be represented by a 1-byte sequence.*/ | |
dst[di++] = (char)c0; | |
continue; | |
} | |
else if (c0 < 0x800) | |
{ | |
/*Can be represented by a 2-byte sequence.*/ | |
dst[di++] = (char)(0xC0|c0>>6); | |
dst[di++] = (char)(0x80|c0&0x3F); | |
continue; | |
} | |
else if (c0 >= 0xD800 && c0 < 0xDC00) | |
{ | |
/*This is safe, because c0 was not 0 and src is NUL-terminated.*/ | |
unsigned c1 = src[si+1]; | |
if (c1 >= 0xDC00 && c1 < 0xE000) | |
{ | |
/*Surrogate pair.*/ | |
unsigned w = ((c0&0x3FF)<<10|c1&0x3FF)+0x10000; | |
/*Can be represented by a 4-byte sequence.*/ | |
dst[di++] = (char)(0xF0|w>>18); | |
dst[di++] = (char)(0x80|w>>12&0x3F); | |
dst[di++] = (char)(0x80|w>>6&0x3F); | |
dst[di++] = (char)(0x80|w&0x3F); | |
si++; | |
continue; | |
} | |
} | |
/*Anything else is either a valid 3-byte sequence, an invalid surrogate | |
pair, or 'not a character'. | |
In the latter two cases, we just encode the value as a 3-byte | |
sequence anyway (producing technically invalid UTF-8). | |
Later error handling will detect the problem, with a better | |
chance of giving a useful error message.*/ | |
dst[di++] = (char)(0xE0|c0>>12); | |
dst[di++] = (char)(0x80|c0>>6&0x3F); | |
dst[di++] = (char)(0x80|c0&0x3F); | |
} | |
dst[di++] = '\0'; | |
return dst; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment