Created
November 23, 2013 15:22
-
-
Save jaytaph/7615753 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** gcc -g -O0 -fno-inline -o test test.c `pkg-config --libs --cflags icu-uc icu-io` */ | |
// We will be using UTF8 | |
#define U_CHARSET_IS_UTF8 1 | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include "unicode/uchar.h" | |
#include "unicode/ucnv.h" | |
#include "unicode/utypes.h" | |
#include "unicode/utf8.h" | |
#include "unicode/ustdio.h" | |
#include "unicode/unorm.h" | |
char *test = "I - Björk Guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - \xd1\x82\xe0\xb8\x81\xe0\xb8\xa7"; | |
UConverter *converter; | |
UFILE *out; | |
/** | |
* | |
*/ | |
void utf8_convert_from_char(char *src, UChar **dst, uint32_t *dst_len) { | |
UErrorCode status = U_ZERO_ERROR; | |
if (converter == NULL) { | |
printf("Default Converter name: %s\n", ucnv_getDefaultName()); | |
converter = ucnv_open("UTF-8", &status); // Use default converter | |
if (U_FAILURE(status)) { | |
printf("Cannot create converter: %d\n", status); | |
exit(1); | |
} | |
} | |
*dst = (UChar *)malloc((strlen(src)+1) * sizeof(UChar)); | |
*dst_len = ucnv_toUChars(converter, *dst, strlen(src)+1, src, strlen(src), &status); | |
} | |
/** | |
* | |
*/ | |
int utf8_tolower(UChar *src, int32_t src_len, UChar **dst, int32_t *dst_len, const char *locale) { | |
UErrorCode status = U_ZERO_ERROR; | |
*dst = (UChar *)malloc((src_len+1) * sizeof(UChar)); | |
*dst_len= u_strToLower(*dst, src_len+1, src, src_len, locale, &status); | |
return 0; | |
} | |
/** | |
* | |
*/ | |
int utf8_toupper(UChar *src, int32_t src_len, UChar **dst, int32_t *dst_len, const char *locale) { | |
UErrorCode status = U_ZERO_ERROR; | |
*dst = (UChar *)malloc((src_len+1) * sizeof(UChar)); | |
*dst_len= u_strToUpper(*dst, src_len+1, src, src_len, locale, &status); | |
return 0; | |
} | |
/** | |
* | |
*/ | |
int utf8_reverse(UChar *src, int32_t src_len, UChar **dst, int32_t *dst_len) { | |
*dst = (UChar *)malloc((src_len+1) * sizeof(UChar)); | |
*dst_len = src_len; | |
int i; | |
for (i=0; i!=src_len; i++) { | |
(*dst)[i] = src[src_len - i - 1]; | |
} | |
return 0; | |
} | |
/** | |
* | |
*/ | |
int32_t utf8_strcasecmp(UChar *src, uint32_t src_len, UChar *dst, uint32_t dst_len) { | |
UErrorCode status = U_ZERO_ERROR; | |
int32_t i = u_strCaseCompare(src, src_len, dst, dst_len, 0, &status); | |
return i; | |
} | |
/** | |
* | |
*/ | |
int32_t utf8_strcmp(UChar *src, uint32_t src_len, UChar *dst, uint32_t dst_len) { | |
UErrorCode status = U_ZERO_ERROR; | |
return u_strCompare(src, src_len, dst, dst_len, 0, &status); | |
} | |
/** | |
* | |
*/ | |
void main(int argc, char *argv[]) { | |
out = u_finit(stdout, NULL, NULL); | |
UChar *dst; | |
int32_t dst_len; | |
utf8_convert_from_char(test, &dst, &dst_len); | |
u_fprintf(out, "(%d) %S\n", dst_len, dst); | |
UChar *dst2; | |
int32_t dst2_len; | |
utf8_tolower(dst, dst_len, &dst2, &dst2_len, NULL); | |
u_fprintf(out, "(%d) %S\n", dst2_len, dst2); | |
UChar *dst3; | |
int32_t dst3_len; | |
utf8_tolower(dst, dst_len, &dst3, &dst3_len, "tr_TR"); | |
u_fprintf(out, "(%d) %S\n", dst3_len, dst3); | |
UChar *dst7; | |
int32_t dst7_len; | |
utf8_toupper(dst, dst_len, &dst7, &dst7_len, NULL); | |
u_fprintf(out, "(%d) %S\n", dst7_len, dst7); | |
UChar *dst4; | |
int32_t dst4_len; | |
utf8_reverse(dst, dst_len, &dst4, &dst4_len); | |
u_fprintf(out, "(%d) %S\n", dst4_len, dst4); | |
UChar *dst5, *dst6; | |
int32_t dst5_len, dst6_len; | |
utf8_convert_from_char("Björk Guðmundsdóttir", &dst5, &dst5_len); | |
utf8_convert_from_char("BJÖRK GUÐMUNDSDÓTTIR", &dst6, &dst6_len); | |
u_fprintf(out, "(%d) %S\n", dst5_len, dst5); | |
u_fprintf(out, "(%d) %S\n", dst6_len, dst6); | |
int32_t i; | |
i = utf8_strcasecmp(dst6, dst6_len, dst5, dst5_len); | |
printf("I1: %d\n", i); | |
i = utf8_strcasecmp(dst5, dst5_len, dst6, dst6_len); | |
printf("I2: %d\n", i); | |
i = utf8_strcmp(dst6, dst6_len, dst5, dst5_len); | |
printf("I3: %d\n", i); | |
i = utf8_strcmp(dst5, dst5_len, dst6, dst6_len); | |
printf("I4: %d\n", i); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output:
Default Converter name: UTF-8
(62) I - Björk Guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - тกว
(62) i - björk guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - тกว
(62) ı - björk guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - тกว
(62) I - BJÖRK GUÐMUNDSDÓTTIR - ABCÇDEFGĞHIIJKLMNOÖPRSŞTUÜVYZ - Тกว
(62) วกт - zyvüutşsrpöonmlkjiıhğgfedçcba - rittódsdnumðuG kröjB - I
(20) Björk Guðmundsdóttir
(20) BJÖRK GUÐMUNDSDÓTTIR
I1: 0
I2: 0
I3: -32
I4: 32