Last active
December 19, 2015 13:40
-
-
Save p2004a/5963899 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright 2013 Marek "p2004a" Rusinowski | |
*/ | |
#include <cstdio> | |
#include <cstdlib> | |
#include <cstring> | |
#include <ctime> | |
typedef unsigned short int uchar; | |
inline int u2c(char *dest, uchar c) { | |
unsigned int e = 0; | |
if (c < 0x0080) { | |
dest[0] = (char) c; | |
return 1; | |
} | |
int len = 1 | |
, mask = 0x40 | |
, last = 0x80; | |
while (c > mask) { | |
e |= 0x80 | (0x3F & c); | |
e <<= 8; | |
c >>= 6; | |
last |= mask; | |
mask >>= 1; | |
++len; | |
} | |
e |= last | c; | |
((unsigned int *) dest)[0] = e; | |
return len; | |
} | |
inline int c2u(uchar *dest, char *src) { | |
if (!(0x80 & src[0])) { | |
*dest = (uchar) src[0]; | |
return 1; | |
} | |
int len = 0; | |
unsigned long c = 0LL; | |
int i, j, mask = 0x7F; | |
for (i = 1, j = 0x40; src[len] & j; ++i, j >>= 1) { | |
mask &= ~j; | |
} | |
mask &= ~j; | |
c |= mask & src[len]; | |
while (--i) { | |
++len; | |
c <<= 6; | |
c |= src[len] & 0x3F; | |
} | |
*dest = (uchar) c; | |
return len + 1; | |
} | |
char *u2cs(uchar *str, int ulen, int &clen) { | |
int size = ulen * 2; | |
char *res = (char *) malloc(ulen * 3); | |
for (int i = 0, &j = clen = 0; i < ulen; ++i) { | |
j += u2c(res + j, str[i]); | |
} | |
return res; | |
} | |
uchar *c2us(char *str, int clen, int &ulen) { | |
if (!strncmp(str, "\xEF\xBB\xBF", 3)) { | |
fprintf(stderr, "WARNING: Passed utf8 string with BOM.\n"); | |
str += 3; | |
clen -= 3; | |
} | |
uchar *res = (uchar *) malloc(sizeof(uchar) * clen); | |
for (int i = 0, &j = ulen = 0; i < clen; ++j) { | |
i += c2u(res + j, str + i); | |
} | |
return res; | |
} | |
void print_tdiff(struct timespec &p1, struct timespec &p2) { | |
int s = p2.tv_sec - p1.tv_sec - 1; | |
int ns = (1000000000 - p1.tv_nsec) + p2.tv_nsec; | |
if (ns > 1000000000) { | |
ns %= 1000000000; | |
++s; | |
} | |
fprintf(stderr, "time: %02d.%09d\n", s, ns); | |
} | |
int main(int argc, char *argv[]) { | |
if (argc < 2) return 1; | |
FILE *file = fopen(argv[1], "r"); | |
if (!file) return 2; | |
fseek(file, 0, SEEK_END); | |
int size = ftell(file); | |
fseek(file, 0, SEEK_SET); | |
char *content = (char *) malloc(size); | |
fread(content, 1, size, file); | |
fprintf(stderr, "Loaded file\n"); | |
fclose(file); | |
int text_len; | |
struct timespec p1, p2; | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p1); | |
uchar *text = c2us(content, size, text_len); | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p2); | |
print_tdiff(p1, p2); | |
if (!text) return 3; | |
int out_len; | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p1); | |
char *out = u2cs(text, text_len, out_len); | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &p2); | |
print_tdiff(p1, p2); | |
fwrite(out, 1, out_len, stdout); | |
free(content); | |
free(text); | |
free(out); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment