Skip to content

Instantly share code, notes, and snippets.

@p2004a
Last active December 19, 2015 13:40
Show Gist options
  • Save p2004a/5963899 to your computer and use it in GitHub Desktop.
Save p2004a/5963899 to your computer and use it in GitHub Desktop.
/*
Copyright 2013 Marek "p2004a" Rusinowski
*/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
typedef unsigned short int uchar;
inline int u2c(char *dest, uchar c) {
unsigned int e = 0;
if (c < 0x0080) {
dest[0] = (char) c;
return 1;
}
int len = 1
, mask = 0x40
, last = 0x80;
while (c > mask) {
e |= 0x80 | (0x3F & c);
e <<= 8;
c >>= 6;
last |= mask;
mask >>= 1;
++len;
}
e |= last | c;
((unsigned int *) dest)[0] = e;
return len;
}
inline int c2u(uchar *dest, char *src) {
if (!(0x80 & src[0])) {
*dest = (uchar) src[0];
return 1;
}
int len = 0;
unsigned long c = 0LL;
int i, j, mask = 0x7F;
for (i = 1, j = 0x40; src[len] & j; ++i, j >>= 1) {
mask &= ~j;
}
mask &= ~j;
c |= mask & src[len];
while (--i) {
++len;
c <<= 6;
c |= src[len] & 0x3F;
}
*dest = (uchar) c;
return len + 1;
}
char *u2cs(uchar *str, int ulen, int &clen) {
int size = ulen * 2;
char *res = (char *) malloc(ulen * 3);
for (int i = 0, &j = clen = 0; i < ulen; ++i) {
j += u2c(res + j, str[i]);
}
return res;
}
uchar *c2us(char *str, int clen, int &ulen) {
if (!strncmp(str, "\xEF\xBB\xBF", 3)) {
fprintf(stderr, "WARNING: Passed utf8 string with BOM.\n");
str += 3;
clen -= 3;
}
uchar *res = (uchar *) malloc(sizeof(uchar) * clen);
for (int i = 0, &j = ulen = 0; i < clen; ++j) {
i += c2u(res + j, str + i);
}
return res;
}
void print_tdiff(struct timespec &p1, struct timespec &p2) {
int s = p2.tv_sec - p1.tv_sec - 1;
int ns = (1000000000 - p1.tv_nsec) + p2.tv_nsec;
if (ns > 1000000000) {
ns %= 1000000000;
++s;
}
fprintf(stderr, "time: %02d.%09d\n", s, ns);
}
int main(int argc, char *argv[]) {
if (argc < 2) return 1;
FILE *file = fopen(argv[1], "r");
if (!file) return 2;
fseek(file, 0, SEEK_END);
int size = ftell(file);
fseek(file, 0, SEEK_SET);
char *content = (char *) malloc(size);
fread(content, 1, size, file);
fprintf(stderr, "Loaded file\n");
fclose(file);
int text_len;
struct timespec p1, p2;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p1);
uchar *text = c2us(content, size, text_len);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p2);
print_tdiff(p1, p2);
if (!text) return 3;
int out_len;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p1);
char *out = u2cs(text, text_len, out_len);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p2);
print_tdiff(p1, p2);
fwrite(out, 1, out_len, stdout);
free(content);
free(text);
free(out);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment