Skip to content

Instantly share code, notes, and snippets.

@bluebear94
Last active September 27, 2015 21:20
Show Gist options
  • Save bluebear94/804553f3e84f361f841b to your computer and use it in GitHub Desktop.
Save bluebear94/804553f3e84f361f841b to your computer and use it in GitHub Desktop.
A UTF-8 file reader.
// temp workaround till pre7
let CHR_NULL = " " - " ";
let CHR_1 = "!" - " ";
let CHR_2 = CHR_1 + CHR_1;
let CHR_4 = CHR_2 + CHR_2;
let CHR_8 = CHR_4 + CHR_4;
let CHR_16 = CHR_8 + CHR_8;
let CHR_32 = " ";
let CHR_64 = "@";
let CHR_128 = "@" + "@";
let CHR_256 = "Ā";
let CHR_512 = "Ȁ";
let CHR_1024 = "Ѐ"; // Cyrillic ye with grave; U+0400
let CHR_2048 = CHR_1024 + CHR_1024;
let CHR_4096 = CHR_2048 + CHR_2048;
let CHR_8192 = CHR_4096 + CHR_4096;
let CHR_16384 = CHR_8192 + CHR_8192;
let CHR_32768 = CHR_16384 + CHR_16384;
function chr(code) {
if (code >= 65536) {
code -= 65536;
return
chr(truncate(code / 4096) + 55296) ~
chr(code % 4096 + 56320);
}
let res = CHR_NULL;
if (code >= 32768) {code -= 32768; res += CHR_32768;}
if (code >= 16384) {code -= 16384; res += CHR_16384;}
if (code >= 8192) {code -= 8192; res += CHR_8192;}
if (code >= 4096) {code -= 4096; res += CHR_4096;}
if (code >= 2048) {code -= 2048; res += CHR_2048;}
if (code >= 1024) {code -= 1024; res += CHR_1024;}
if (code >= 512) {code -= 512; res += CHR_512;}
if (code >= 256) {code -= 256; res += CHR_256;}
if (code >= 128) {code -= 128; res += CHR_128;}
if (code >= 64) {code -= 64; res += CHR_64;}
if (code >= 32) {code -= 32; res += CHR_32;}
if (code >= 16) {code -= 16; res += CHR_16;}
if (code >= 8) {code -= 8; res += CHR_8;}
if (code >= 4) {code -= 4; res += CHR_4;}
if (code >= 2) {code -= 2; res += CHR_2;}
if (code >= 1) {code -= 1; res += CHR_1;}
return res;
}
function utf8CharsToString(chars, len) {
let res = "";
function nexc(i) {
let c = chars[i];
if (c < 0) {c += 256;}
return c;
}
let i = 0;
while (i < len) {
let curr = nexc(i);
if (curr < 128) {res = res ~ chr(curr);}
else if (curr < 192) {RaiseError("Unexpected continuation byte");}
else {
let additionalBytes = 1;
let resc = curr - 192;
if (curr >= 240) {additionalBytes = 3; resc -= 48;}
else if (curr >= 224) {additionalBytes = 2; resc -= 32;}
ascent (j in 0 .. additionalBytes) {
i++;
let cont = nexc(i);
if (cont < 128 || cont >= 192) {RaiseError("Continuation byte expected");}
resc = resc * 64 + (cont - 128);
}
res = res ~ chr(resc);
}
i++;
}
return ToString(res);
}
function ObjFileB_ReadStringUTF8(handle, len) {
let res = [];
ascent (i in 0 .. len) {
res = res ~ [ObjFileB_ReadByte(handle)];
}
return utf8CharsToString(res, len);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment