Last active
April 30, 2021 11:41
-
-
Save ozdemirburak/89a7a1673cb65ce83469 to your computer and use it in GitHub Desktop.
UTF8, UTF16, UTF32, ISO8859 conversions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <errno.h> | |
#include <stdint.h> | |
#include <stddef.h> | |
int getSize(unsigned char *); | |
unsigned char * fileToBuffer(unsigned char *); | |
unsigned char * replaceCharset(const char *, const char *, const char *); | |
unsigned char * extractCharset(const char *); | |
const char *getFileExtension(const char *); | |
const char *getBeforeExtension(const char *); | |
size_t utf8_charlen(uint8_t); | |
size_t utf8_valid(const uint8_t *); | |
uint32_t utf8_to_32(const uint8_t *); | |
unsigned char * UTF8_TO_UTF32(unsigned char*, unsigned char*); | |
unsigned char * UTF32_TO_UTF8(unsigned char*, unsigned char*); | |
unsigned char * UTF8_TO_UTF16(unsigned char*, unsigned char*); | |
unsigned char * UTF16_TO_UTF8(unsigned char*, unsigned char*); | |
unsigned char * ISO8859_TO_UTF8(unsigned char*, unsigned char*, unsigned int); | |
unsigned char * UTF8_TO_ISO8859(unsigned char*, unsigned char*, int); | |
unsigned char * convert(unsigned char*, unsigned char*, const char*, const char*); | |
void writeImageFromBuffer(const char* , const char*, unsigned long); | |
void createFile(unsigned char*, unsigned char*, unsigned char *); | |
int getSize(unsigned char * data) | |
{ | |
int datasize =0; | |
while (data[datasize] != '\0') | |
{ | |
datasize++; | |
} | |
return datasize; | |
} | |
// stores file data in a buffer and returns it | |
unsigned char * fileToBuffer(unsigned char* filename) | |
{ | |
unsigned char *source = NULL; | |
FILE *fp = fopen(filename, "r"); | |
if (fp != NULL) | |
{ | |
// goto end | |
if (fseek(fp, 0L, SEEK_END) == 0) | |
{ | |
// get the size of file | |
long bufsize = ftell(fp); | |
if (bufsize == -1) | |
{ | |
printf("Error occured in fileToBuffer function while trying to get size \n"); | |
} | |
// allocate | |
source = malloc(sizeof(char) * (bufsize + 1)); | |
// goto start | |
if (fseek(fp, 0L, SEEK_SET) != 0) | |
{ | |
printf("Error occured in fileToBuffer function while trying to go to start \n"); | |
} | |
// read the entire file | |
size_t newLen = fread(source, sizeof(char), bufsize, fp); | |
if (newLen == 0) | |
{ | |
fputs("Error reading file", stderr); | |
} | |
else | |
{ | |
source[++newLen] = '\0'; | |
} | |
} | |
} | |
else | |
{ | |
printf("Error occured in fileToBuffer function while trying to access to file \n"); | |
} | |
fclose(fp); | |
return source; | |
} | |
unsigned char *replaceCharset(const char *str, const char *old, const char *new) | |
{ | |
char *ret, *r; | |
const char *p, *q; | |
size_t oldlen = strlen(old); | |
size_t count, retlen, newlen = strlen(new); | |
int samesize = (oldlen == newlen); | |
if (!samesize) | |
{ | |
for (count = 0, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) | |
count++; | |
retlen = p - str + strlen(p) + count * (newlen - oldlen); | |
} | |
else | |
retlen = strlen(str); | |
if ((ret = malloc(retlen + 1)) == NULL) | |
return NULL; | |
r = ret, p = str; | |
while (1) | |
{ | |
if (!samesize && !count--) | |
break; | |
if ((q = strstr(p, old)) == NULL) | |
break; | |
ptrdiff_t l = q - p; | |
memcpy(r, p, l); | |
r += l; | |
memcpy(r, new, newlen); | |
r += newlen; | |
p = q + oldlen; | |
} | |
strcpy(r, p); | |
return ret; | |
} | |
unsigned char *extractCharset(const char *str) | |
{ | |
char * charset; | |
if(strstr (str,"utf8") != NULL || strstr (str,"utf-8") != NULL) | |
charset = "utf-8"; | |
else if(strstr (str,"utf16") != NULL || strstr (str,"utf-16") != NULL) | |
charset = "utf-16"; | |
else if(strstr (str,"utf32") != NULL || strstr (str,"utf-32") != NULL) | |
charset = "utf-32"; | |
else if(strstr (str,"iso-8859-1") != NULL || strstr (str,"88591") != NULL) | |
charset = "iso-8859-1"; | |
else if(strstr (str,"iso-8859-9") != NULL || strstr (str,"88599") != NULL) | |
charset = "iso-8859-9"; | |
else // not found, just make it utf-8 | |
charset = "utf-8"; | |
return charset; | |
} | |
const char *getFileExtension(const char *filename) | |
{ | |
const char *e = strrchr (filename, '.'); | |
if (e == NULL) | |
e = ""; | |
return e; | |
} | |
const char *getBeforeExtension(const char *filename) | |
{ | |
char *ptr = NULL; | |
ptr = malloc(sizeof(char) * strlen(filename)); | |
int total = 0; | |
while(*filename != '.') | |
{ | |
printf("%c \n", *filename); | |
*ptr++ = *filename++; | |
total++; | |
} | |
return ptr - total; | |
} | |
size_t utf8_charlen(uint8_t c) | |
{ | |
if (c < 0x80) return 1; /* 0xxxxxxx */ | |
else if ((c & 0xe0)==0xc0) return 2; /* 110xxxxx */ | |
else if ((c & 0xf0)==0xe0) return 3; /* 1110xxxx */ | |
else if ((c & 0xf8)==0xf0 && (c <= 0xf4)) return 4; /* 11110xxx */ | |
else return 0; /* invalid UTF8 */ | |
} | |
size_t utf8_valid(const uint8_t *c) | |
{ | |
size_t clen = utf8_charlen(*c); | |
switch(clen) | |
{ | |
case 4: if ((c[3] & 0xc0) != 0x80) return 0; | |
case 3: if ((c[2] & 0xc0) != 0x80) return 0; | |
case 2: if ((c[1] & 0xc0) != 0x80) return 0; | |
case 1: return clen; /* no trailing bytes to validate */ | |
case 0: return 0; /* invalid utf8 */ | |
} | |
return clen; /* don't complain, gcc */ | |
} | |
uint32_t utf8_to_32(const uint8_t *c) | |
{ | |
switch(utf8_valid(c)) | |
{ | |
case 0: return 0; /* invalid utf8 */ | |
case 1: return *c; /* no work, just promote size */ | |
case 2: return ((c[0] & 0x1f) << 6) | (c[1] & 0x3f); | |
case 3: return ((c[0] & 0x0f) << 12) | ((c[1] & 0x3f) << 6) | (c[2] & 0x3f); | |
case 4: return ((c[0] & 0x07) << 18) | ((c[1] & 0x3f) << 12) | ((c[2] & 0x3f) << 6) | (c[3] & 0x3f); | |
} | |
return 0; /* no complaints gcc */ | |
} | |
unsigned char* UTF8_TO_UTF32(unsigned char* in, unsigned char* out) | |
{ | |
int total = 0; | |
unsigned char *c; | |
for(c = in; *c; c+=utf8_charlen(*c)) | |
{ | |
*out++ = utf8_to_32(c); | |
total++; | |
} | |
return out-total; | |
} | |
unsigned char* UTF32_TO_UTF8(unsigned char* in, unsigned char* out) | |
{ | |
unsigned int total = 0; | |
while(*in) | |
{ | |
if(*in < 0x80) | |
{ | |
*out++ = *in; | |
total++; | |
} | |
else if(*in < 0x800) | |
{ | |
*out++ = 0xc0 + ((*in & 0x7c0) >> 6); | |
*out++ = 0x80 + (*in & 0x3f); | |
total = total + 2; | |
} | |
else if(*in < 0x10000) | |
{ | |
*out++ = 0xe0 + ((*in & 0xf000) >> 12); | |
*out++ = 0x80 + ((*in & 0xfc0) >> 6); | |
*out++ = 0x80 + (*in & 0x3f); | |
total = total + 3; | |
} | |
else if(*in < 0x200000) | |
{ | |
*out++ = 0xf0 + ((*in & 0x1c) >> 18); | |
*out++ = 0x80 + ((*in & 0x3f) >> 12); | |
*out++ = 0x80 + ((*in & 0x3f) >> 6); | |
*out++ = 0x80 + (*in & 0x3f); | |
total = total + 4; | |
} | |
else if(*in < 0x4000000) | |
{ | |
*out++ = 0xf8 + ((*in & 0x30) >> 24); | |
*out++ = 0x80 + ((*in & 0x3f) >> 18); | |
*out++ = 0x80 + ((*in & 0x3f) >> 12); | |
*out++ = 0x80 + ((*in & 0x3f) >> 6); | |
*out++ = 0x80 + (*in & 0x3f); | |
total = total + 5; | |
} | |
else if(*in < 0x80000000) | |
{ | |
*out++ = 0xfc + (*in >> 30); | |
*out++ = 0x80 + ((*in & 0x3f) >> 24); | |
*out++ = 0x80 + ((*in & 0x3f) >> 18); | |
*out++ = 0x80 + ((*in & 0x3f) >> 12); | |
*out++ = 0x80 + ((*in & 0x3f) >> 6); | |
*out++ = 0x80 + (*in & 0x3f); | |
total = total + 6; | |
} | |
*in++; | |
} | |
return out - total; | |
} | |
unsigned char* UTF8_TO_UTF16(unsigned char* in, unsigned char* out) | |
{ | |
unsigned int total = 0; | |
while(*in) | |
{ | |
if(*in < 0x10000) | |
{ | |
*out++ = *in++; | |
total++; | |
} | |
else | |
{ | |
*in -= 0x10000; | |
*out++ = 0xd800 | (*in++ >> 10); | |
*out++ = 0xdc00 | (*in++ & 0x3ff); | |
total = total + 2; | |
} | |
} | |
return out - total; | |
} | |
unsigned char* UTF16_TO_UTF8(unsigned char* in, unsigned char* out) | |
{ | |
unsigned int total = 0; | |
while(*in) | |
{ | |
if((*in >= 0x0001) && (*in <= 0x007f)) | |
{ | |
*out++ = *in++; | |
total++; | |
} | |
else if(*in > 0x07ff) | |
{ | |
*out++ = 0xe0 | ((*in++ >> 12) & 0x0f); | |
*out++ = 0x80 | ((*in++ >> 6) & 0x3f); | |
*out++ = 0x80 | (*in++ & 0x3f); | |
total = total + 3; | |
} | |
else | |
{ | |
*out++ = 0xc0 | ((*in++ >> 6) & 0x1f); | |
*out++ = 0x80 | (*in++ & 0x3f); | |
total = total + 2; | |
} | |
} | |
return out - total; | |
} | |
/* | |
Bits of CP First Last Sequence Byte 1 Byte 2 Byte3 Byte4 | |
7 U+0000 U+007F 1 0xxxxxxx - - - | |
11 U+0080 U+07FF 2 110xxxxx 10xxxxxx - - | |
16 U+0800 U+FFFF 3 1110xxxx 10xxxxxx 10xxxxxx - | |
21 U+10000 U+1FFFFF 4 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
*/ | |
unsigned char* ISO8859_TO_UTF8(unsigned char* in, unsigned char* out, unsigned int version) | |
{ | |
// version 9 = Turkish, version 1 = Standart | |
unsigned int c; | |
unsigned int total = 0; | |
while (*in) | |
{ | |
if (*in < 0x80) // 128, ASCII, keep the same | |
{ | |
// no need to change, keep same | |
*out++ = *in++; | |
// dont miss pointer | |
total++; | |
} | |
else if (*in < 0x0800) // 128 to 2047 | |
{ | |
// Turkish | |
if(version == 9) | |
{ | |
/* | |
References: | |
-> http://www.unicodetools.com/unicode/codepages.php?codepage=iso-8859-9 | |
-> http://en.wikipedia.org/wiki/ISO/IEC_8859-9 | |
-> http://www.utf8-chartable.de/unicode-utf8-table.pl?start=256&names=-&utf8=dec | |
Position 0xD0 0xDD 0xDE 0xF0 0xFD 0xFE | |
8859-9 Ğ İ Ş ğ ı ş | |
8859-1 Ð Ý Þ ð ý þ | |
Ğ -> Hex: 0xD0 && Decimal: 208 && UTF8: c4 9e | 196 158 | |
ğ -> Hex: 0xF0 && Decimal: 240 && UTF8: c4 9f | 196 159 | |
İ -> Hex: 0xDD && Decimal: 221 && UTF8: c4 b0 | 196 176 | |
ı -> Hex: 0xFD && Decimal: 253 && UTF8: c4 b1 | 196 177 | |
Ş -> Hex: 0xDE && Decimal: 222 && UTF8: c3 96 | 197 158 | |
ş -> Hex: 0xFE && Decimal: 254 && UTF8: c5 9f | 197 159 | |
*/ | |
// if already utf8, keep the same | |
if | |
(( | |
(*in == 196 && *(in+1) == 158) || // Ğ | |
(*in == 196 && *(in+1) == 159) || // ğ | |
(*in == 196 && *(in+1) == 176) || // İ | |
(*in == 196 && *(in+1) == 177) || // ı | |
(*in == 197 && *(in+1) == 158) || // Ş | |
(*in == 197 && *(in+1) == 159) // ş | |
)) | |
{ | |
*out++ = *in++; | |
*out++ = *in++; | |
} | |
else | |
{ | |
switch(*in) | |
{ | |
printf("%d \n",*in); | |
case(208): // Ğ | |
*out++ = 196; *out++ = 158; *in++; *in++; | |
case(240): // ğ | |
*out++ = 196; *out++ = 159; *in++; *in++; | |
case(221): // İ | |
*out++ = 196; *out++ = 176; *in++; *in++; | |
case(253): // ı | |
*out++ = 196; *out++ = 177; *in++; *in++; | |
case(222): // Ş | |
*out++ = 197; *out++ = 158; *in++; *in++; | |
case(254): // ş | |
*out++ = 197; *out++ = 159; *in++; *in++; | |
default: | |
// 0xc0 OR (in and 0xc0) shift right by 6 | |
// *in AND 0xc0 = ab cd ef gh AND 11 00 00 00 = ab 00 00 00 | |
// ab 00 00 00 : shift right -> 00 00 00 ab | |
// 11 00 00 00 or 00 00 00 ab | |
// 11 00 00 ab = *out++ -> in 110xxxxx format | |
*out++ = (0xc0 | (*in++ & 0xc0) >> 6); // first byte | |
// 0x80 OR (in and 0x3f) | |
// *in AND 0x3f = ab cd ef gh AND 00 11 11 11 = 00 cd ef gh | |
// 10 00 00 00 OR 00 cd ef gh = 10 cd ef gh | |
// 10 cd ef gh = *out++ -> in 10xxxxxx format | |
*out++ = (0x80 | (*in++ & 0x3f)); // second byte11 U+0080 U+07FF 2 110xxxxx 10xxxxxx | |
} | |
} | |
// dont miss pointer | |
total = total + 2; | |
} | |
else | |
{ | |
// 0xc0 OR (in and 0xc0) shift right by 6 | |
// *in AND 0xc0 = ab cd ef gh AND 11 00 00 00 = ab 00 00 00 | |
// ab 00 00 00 : shift right -> 00 00 00 ab | |
// 11 00 00 00 or 00 00 00 ab | |
// 11 00 00 ab = *out++ -> in 110xxxxx format | |
*out++ = (0xc0 | (*in++ & 0xc0) >> 6); // first byte | |
// 0x80 OR (in and 0x3f) | |
// *in AND 0x3f = ab cd ef gh AND 00 11 11 11 = 00 cd ef gh | |
// 10 00 00 00 OR 00 cd ef gh = 10 cd ef gh | |
// 10 cd ef gh = *out++ -> in 10xxxxxx format | |
*out++ = (0x80 | (*in++ & 0x3f)); // second byte11 U+0080 U+07FF 2 110xxxxx 10xxxxxx | |
// dont miss pointer | |
total = total + 2; | |
} | |
} | |
else if(*in < 0x010000) // 2048 to 65535 | |
{ | |
// 1110xxxx format | |
*out++ = (0xe0 | ((*in++ >> 12) & 0x0f)); // first byte | |
// 10xxxxxx format | |
*out++ = (0x80 | ((*in++ >> 6) & 0x3f)); // second byte | |
// 10xxxxxx format | |
*out++ = (0x80 | (*in++ & 0x3f)); // third byte | |
// dont miss pointer | |
total = total + 3; | |
} | |
else if(*in < 0x110000) // 65536 to 1114112 | |
{ | |
// 11110xxx format | |
*out++ = (0xf0 | ((*in++ >> 18) & 0x07)); // first byte | |
// 10xxxxxx format | |
*out++ = (0x80 | ((*in++ >> 12) & 0x3f)); // second byte | |
// 10xxxxxx format | |
*out++ = (0x80 | ((*in++ >> 6) & 0x3f)); // third byte | |
// 10xxxxxx format | |
*out++ = (0x80 | (*in++ & 0x3f)); // forth byte | |
// dont miss pointer | |
total = total + 4; | |
} | |
else | |
{ | |
printf("ERROR \n"); | |
return -1; | |
} | |
} | |
return out - total; | |
} | |
unsigned char* UTF8_TO_ISO8859(unsigned char* in, unsigned char* out, int version) | |
{ | |
unsigned int total = 0; | |
while (*in) | |
{ | |
if (*in < 0x80) // 128, ASCII, keep the same | |
{ | |
// no need to change, keep same | |
*out++ = *in++; | |
// dont miss pointer | |
total++; | |
} | |
else if(*in < 0x800) | |
{ | |
if(version == 9) | |
{ | |
/* | |
Ğ -> Hex: 0xD0 && Decimal: 208 && UTF8: c4 9e | 196 158 | |
ğ -> Hex: 0xF0 && Decimal: 240 && UTF8: c4 9f | 196 159 | |
İ -> Hex: 0xDD && Decimal: 221 && UTF8: c4 b0 | 196 176 | |
ı -> Hex: 0xFD && Decimal: 253 && UTF8: c4 b1 | 196 177 | |
Ş -> Hex: 0xDE && Decimal: 222 && UTF8: c3 96 | 197 158 | |
ş -> Hex: 0xFE && Decimal: 254 && UTF8: c5 9f | 197 159 | |
*/ | |
if((*(in) == 196) && (*(in+1) == 158)) // Ğ | |
{ | |
*out++ = 208; | |
} | |
else if((*(in) == 196) && (*(in+1) == 159)) // ğ | |
{ | |
*out++ = 240; | |
} | |
else if((*(in) == 196) && (*(in+1) == 176)) // İ | |
{ | |
*out++ = 221; | |
} | |
else if((*(in) == 196) && (*(in+1) == 177)) // ı | |
{ | |
*out++ = 253; | |
} | |
else if((*(in) == 197) && (*(in+1) == 158)) // Ş | |
{ | |
*out++ = 222; | |
} | |
else if((*(in) == 197) && (*(in+1) == 159)) // ş | |
{ | |
*out++ = 254; | |
} | |
else | |
{ | |
*out++ = ((0x1F & *(in) ) << 6) | (*(in+1) & 0x3F); | |
} | |
*in++; | |
*in++; | |
// dont miss pointer | |
total++; | |
} | |
else | |
{ | |
// ((0x1F AND in) shift 6) OR (in and 0x3f) | |
// 00 01 11 11 AND ab cd ef gh = 00 0d ef gh | |
// 00 0d ef gh shift 6 = 0d ef gh 00 | |
// *in AND 0x3f = ab cd ef gh AND 00 11 11 11 = 00 cd ef gh | |
// 0d ef gh 00 OR 00 cd ef gh = 0a bc cd ef | |
// 0a bc cd ef = *out++ -> in 0xxxxxxx format | |
*out++ = ((0x1F & *in++ ) << 6) | (*in++ & 0x3F) ; | |
// dont miss pointer | |
total++; | |
} | |
} | |
else if(*in < 0x010000) // 2048 to 65535 | |
{ | |
*out++ = ((0x0F & *in++ ) << 12) | (*in++ << 6) | *in++; | |
total++; | |
} | |
else if(*in < 0x110000) // 65536 to 1114112 | |
{ | |
*out++ = (((0x0F & *in++ ) << 18) | (*in++ << 12) | (*in++ << 6) | *in++ ) + 65536; | |
total++; | |
} | |
else | |
{ | |
printf("ERROR \n"); | |
return -1; | |
} | |
} | |
return out - total; | |
} | |
unsigned char * convert(unsigned char* in, unsigned char* out, const char* from, const char* to) | |
{ | |
if(strcmp(from,"utf-8") == 0 && strcmp(to,"utf-16") == 0) | |
{ | |
out = UTF8_TO_UTF16(in,out); | |
} | |
else if(strcmp(from,"utf-8") == 0 && strcmp(to,"utf-32") == 0) | |
{ | |
out = UTF8_TO_UTF32(in,out); | |
} | |
else if(strcmp(from,"utf-8") == 0 && strcmp(to,"iso-8859-1") == 0) | |
{ | |
out = UTF8_TO_ISO8859(in,out,1); | |
} | |
else if(strcmp(from,"utf-8") == 0 && strcmp(to,"iso-8859-9") == 0) | |
{ | |
out = UTF8_TO_ISO8859(in,out,9); | |
} | |
else if(strcmp(from,"utf-16") == 0 && strcmp(to,"utf-8") == 0) | |
{ | |
out = UTF16_TO_UTF8(in,out); | |
} | |
else if(strcmp(from,"utf-16") == 0 && strcmp(to,"utf-32") == 0) | |
{ | |
out = UTF16_TO_UTF8(in,out); | |
out = UTF8_TO_UTF32(in,out); | |
} | |
else if(strcmp(from,"utf-16") == 0 && strcmp(to,"iso-8859-1") == 0) | |
{ | |
out = UTF16_TO_UTF8(in,out); | |
out = UTF8_TO_ISO8859(in,out,1); | |
} | |
else if(strcmp(from,"utf-16") == 0 && strcmp(to,"iso-8859-9") == 0) | |
{ | |
out = UTF16_TO_UTF8(in,out); | |
out = UTF8_TO_ISO8859(in,out,9); | |
} | |
else if(strcmp(from,"utf-32") == 0 && strcmp(to,"utf-8") == 0) | |
{ | |
out = UTF32_TO_UTF8(in,out); | |
} | |
else if(strcmp(from,"utf-32") == 0 && strcmp(to,"utf-16") == 0) | |
{ | |
out = UTF32_TO_UTF8(in,out); | |
out = UTF8_TO_UTF16(in,out); | |
} | |
else if(strcmp(from,"utf-32") == 0 && strcmp(to,"iso-8859-1") == 0) | |
{ | |
out = UTF32_TO_UTF8(in,out); | |
out = UTF8_TO_ISO8859(in,out,1); | |
} | |
else if(strcmp(from,"utf-32") == 0 && strcmp(to,"iso-8859-9") == 0) | |
{ | |
out = UTF32_TO_UTF8(in,out); | |
out = UTF8_TO_ISO8859(in,out,9); | |
} | |
else if(strcmp(from,"iso-8859-1") == 0 && strcmp(to,"utf-8") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,1); | |
} | |
else if(strcmp(from,"iso-8859-1") == 0 && strcmp(to,"utf-16") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,1); | |
out = UTF8_TO_UTF16(in,out); | |
} | |
else if(strcmp(from,"iso-8859-1") == 0 && strcmp(to,"utf-32") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,1); | |
out = UTF8_TO_UTF32(in,out); | |
} | |
else if(strcmp(from,"iso-8859-1") == 0 && strcmp(to,"iso-8859-9") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,1); | |
out = UTF8_TO_ISO8859(in,out,9); | |
} | |
else if(strcmp(from,"iso-8859-9") == 0 && strcmp(to,"utf-8") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,9); | |
} | |
else if(strcmp(from,"iso-8859-9") == 0 && strcmp(to,"utf-16") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,9); | |
out = UTF8_TO_UTF16(in,out); | |
} | |
else if(strcmp(from,"iso-8859-9") == 0 && strcmp(to,"utf-32") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,9); | |
out = UTF8_TO_UTF32(in,out); | |
} | |
else if(strcmp(from,"iso-8859-9") == 0 && strcmp(to,"iso-8859-1") == 0) | |
{ | |
out = ISO8859_TO_UTF8(in,out,9); | |
out = UTF8_TO_ISO8859(in,out,1); | |
} | |
else | |
{ | |
out = in; | |
} | |
} | |
void writeImageFromBuffer ( const char* filename , const char* buffer , unsigned long length ) | |
{ | |
const int REGULARPACKAGE_SIZE = 1024 * 1024; | |
FILE *image; /* Pointer to file */ | |
unsigned long bytesWritten; /* Bytes written so far in the buffer */ | |
int bytesToWrite; /* Bytes to write to file */ | |
char *tmpBuffer; /* Temporary buffer */ | |
bytesWritten = 0; | |
image = fopen ( filename , "wb" ); | |
// allocate tmpBuffer | |
tmpBuffer = (char *) malloc ( REGULARPACKAGE_SIZE ); | |
while ( bytesWritten < length ) | |
{ | |
if ( length - bytesWritten >= REGULARPACKAGE_SIZE ) /* not last frame */ | |
{ | |
bytesToWrite = REGULARPACKAGE_SIZE; | |
} | |
else /* last frame */ | |
{ | |
bytesToWrite = length - bytesWritten; | |
// reallocate tmpBuffer to its adecuate size | |
tmpBuffer = (char *) realloc ( tmpBuffer , bytesToWrite ); | |
} | |
// copy original buffer <bytesToWrite> elements to tmpBuffer | |
tmpBuffer = &buffer[bytesWritten]; | |
// write tmpBuffer to file | |
fwrite ( tmpBuffer , 1 , bytesToWrite , image ); | |
// just upgrade the var | |
bytesWritten += bytesToWrite; | |
} | |
fclose ( image ); | |
} | |
void createFile(unsigned char* infile, unsigned char* outfile, unsigned char * charset) | |
{ | |
unsigned char * in, * out = NULL; | |
int total = 0; | |
in = fileToBuffer(infile); // in has the file data now | |
int datasize = getSize(in); | |
out = malloc( sizeof(char) * datasize); | |
out = convert(in,out,"utf-8","iso-8859-9"); | |
writeImageFromBuffer(outfile,out,getSize(out)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment