Skip to content

Instantly share code, notes, and snippets.

@jaytaph
Created November 23, 2013 15:22
Show Gist options
  • Save jaytaph/7615753 to your computer and use it in GitHub Desktop.
Save jaytaph/7615753 to your computer and use it in GitHub Desktop.
/** gcc -g -O0 -fno-inline -o test test.c `pkg-config --libs --cflags icu-uc icu-io` */
// We will be using UTF8
#define U_CHARSET_IS_UTF8 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "unicode/uchar.h"
#include "unicode/ucnv.h"
#include "unicode/utypes.h"
#include "unicode/utf8.h"
#include "unicode/ustdio.h"
#include "unicode/unorm.h"
char *test = "I - Björk Guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - \xd1\x82\xe0\xb8\x81\xe0\xb8\xa7";
UConverter *converter;
UFILE *out;
/**
*
*/
void utf8_convert_from_char(char *src, UChar **dst, uint32_t *dst_len) {
UErrorCode status = U_ZERO_ERROR;
if (converter == NULL) {
printf("Default Converter name: %s\n", ucnv_getDefaultName());
converter = ucnv_open("UTF-8", &status); // Use default converter
if (U_FAILURE(status)) {
printf("Cannot create converter: %d\n", status);
exit(1);
}
}
*dst = (UChar *)malloc((strlen(src)+1) * sizeof(UChar));
*dst_len = ucnv_toUChars(converter, *dst, strlen(src)+1, src, strlen(src), &status);
}
/**
*
*/
int utf8_tolower(UChar *src, int32_t src_len, UChar **dst, int32_t *dst_len, const char *locale) {
UErrorCode status = U_ZERO_ERROR;
*dst = (UChar *)malloc((src_len+1) * sizeof(UChar));
*dst_len= u_strToLower(*dst, src_len+1, src, src_len, locale, &status);
return 0;
}
/**
*
*/
int utf8_toupper(UChar *src, int32_t src_len, UChar **dst, int32_t *dst_len, const char *locale) {
UErrorCode status = U_ZERO_ERROR;
*dst = (UChar *)malloc((src_len+1) * sizeof(UChar));
*dst_len= u_strToUpper(*dst, src_len+1, src, src_len, locale, &status);
return 0;
}
/**
*
*/
int utf8_reverse(UChar *src, int32_t src_len, UChar **dst, int32_t *dst_len) {
*dst = (UChar *)malloc((src_len+1) * sizeof(UChar));
*dst_len = src_len;
int i;
for (i=0; i!=src_len; i++) {
(*dst)[i] = src[src_len - i - 1];
}
return 0;
}
/**
*
*/
int32_t utf8_strcasecmp(UChar *src, uint32_t src_len, UChar *dst, uint32_t dst_len) {
UErrorCode status = U_ZERO_ERROR;
int32_t i = u_strCaseCompare(src, src_len, dst, dst_len, 0, &status);
return i;
}
/**
*
*/
int32_t utf8_strcmp(UChar *src, uint32_t src_len, UChar *dst, uint32_t dst_len) {
UErrorCode status = U_ZERO_ERROR;
return u_strCompare(src, src_len, dst, dst_len, 0, &status);
}
/**
*
*/
void main(int argc, char *argv[]) {
out = u_finit(stdout, NULL, NULL);
UChar *dst;
int32_t dst_len;
utf8_convert_from_char(test, &dst, &dst_len);
u_fprintf(out, "(%d) %S\n", dst_len, dst);
UChar *dst2;
int32_t dst2_len;
utf8_tolower(dst, dst_len, &dst2, &dst2_len, NULL);
u_fprintf(out, "(%d) %S\n", dst2_len, dst2);
UChar *dst3;
int32_t dst3_len;
utf8_tolower(dst, dst_len, &dst3, &dst3_len, "tr_TR");
u_fprintf(out, "(%d) %S\n", dst3_len, dst3);
UChar *dst7;
int32_t dst7_len;
utf8_toupper(dst, dst_len, &dst7, &dst7_len, NULL);
u_fprintf(out, "(%d) %S\n", dst7_len, dst7);
UChar *dst4;
int32_t dst4_len;
utf8_reverse(dst, dst_len, &dst4, &dst4_len);
u_fprintf(out, "(%d) %S\n", dst4_len, dst4);
UChar *dst5, *dst6;
int32_t dst5_len, dst6_len;
utf8_convert_from_char("Björk Guðmundsdóttir", &dst5, &dst5_len);
utf8_convert_from_char("BJÖRK GUÐMUNDSDÓTTIR", &dst6, &dst6_len);
u_fprintf(out, "(%d) %S\n", dst5_len, dst5);
u_fprintf(out, "(%d) %S\n", dst6_len, dst6);
int32_t i;
i = utf8_strcasecmp(dst6, dst6_len, dst5, dst5_len);
printf("I1: %d\n", i);
i = utf8_strcasecmp(dst5, dst5_len, dst6, dst6_len);
printf("I2: %d\n", i);
i = utf8_strcmp(dst6, dst6_len, dst5, dst5_len);
printf("I3: %d\n", i);
i = utf8_strcmp(dst5, dst5_len, dst6, dst6_len);
printf("I4: %d\n", i);
}
@jaytaph
Copy link
Author

jaytaph commented Nov 23, 2013

Output:

Default Converter name: UTF-8
(62) I - Björk Guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - тกว
(62) i - björk guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - тกว
(62) ı - björk guðmundsdóttir - abcçdefgğhıijklmnoöprsştuüvyz - тกว
(62) I - BJÖRK GUÐMUNDSDÓTTIR - ABCÇDEFGĞHIIJKLMNOÖPRSŞTUÜVYZ - Тกว
(62) วกт - zyvüutşsrpöonmlkjiıhğgfedçcba - rittódsdnumðuG kröjB - I
(20) Björk Guðmundsdóttir
(20) BJÖRK GUÐMUNDSDÓTTIR
I1: 0
I2: 0
I3: -32
I4: 32

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment