Last active
September 27, 2015 21:20
-
-
Save bluebear94/804553f3e84f361f841b to your computer and use it in GitHub Desktop.
A UTF-8 file reader.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// temp workaround till pre7 | |
let CHR_NULL = " " - " "; | |
let CHR_1 = "!" - " "; | |
let CHR_2 = CHR_1 + CHR_1; | |
let CHR_4 = CHR_2 + CHR_2; | |
let CHR_8 = CHR_4 + CHR_4; | |
let CHR_16 = CHR_8 + CHR_8; | |
let CHR_32 = " "; | |
let CHR_64 = "@"; | |
let CHR_128 = "@" + "@"; | |
let CHR_256 = "Ā"; | |
let CHR_512 = "Ȁ"; | |
let CHR_1024 = "Ѐ"; // Cyrillic ye with grave; U+0400 | |
let CHR_2048 = CHR_1024 + CHR_1024; | |
let CHR_4096 = CHR_2048 + CHR_2048; | |
let CHR_8192 = CHR_4096 + CHR_4096; | |
let CHR_16384 = CHR_8192 + CHR_8192; | |
let CHR_32768 = CHR_16384 + CHR_16384; | |
function chr(code) { | |
if (code >= 65536) { | |
code -= 65536; | |
return | |
chr(truncate(code / 4096) + 55296) ~ | |
chr(code % 4096 + 56320); | |
} | |
let res = CHR_NULL; | |
if (code >= 32768) {code -= 32768; res += CHR_32768;} | |
if (code >= 16384) {code -= 16384; res += CHR_16384;} | |
if (code >= 8192) {code -= 8192; res += CHR_8192;} | |
if (code >= 4096) {code -= 4096; res += CHR_4096;} | |
if (code >= 2048) {code -= 2048; res += CHR_2048;} | |
if (code >= 1024) {code -= 1024; res += CHR_1024;} | |
if (code >= 512) {code -= 512; res += CHR_512;} | |
if (code >= 256) {code -= 256; res += CHR_256;} | |
if (code >= 128) {code -= 128; res += CHR_128;} | |
if (code >= 64) {code -= 64; res += CHR_64;} | |
if (code >= 32) {code -= 32; res += CHR_32;} | |
if (code >= 16) {code -= 16; res += CHR_16;} | |
if (code >= 8) {code -= 8; res += CHR_8;} | |
if (code >= 4) {code -= 4; res += CHR_4;} | |
if (code >= 2) {code -= 2; res += CHR_2;} | |
if (code >= 1) {code -= 1; res += CHR_1;} | |
return res; | |
} | |
function utf8CharsToString(chars, len) { | |
let res = ""; | |
function nexc(i) { | |
let c = chars[i]; | |
if (c < 0) {c += 256;} | |
return c; | |
} | |
let i = 0; | |
while (i < len) { | |
let curr = nexc(i); | |
if (curr < 128) {res = res ~ chr(curr);} | |
else if (curr < 192) {RaiseError("Unexpected continuation byte");} | |
else { | |
let additionalBytes = 1; | |
let resc = curr - 192; | |
if (curr >= 240) {additionalBytes = 3; resc -= 48;} | |
else if (curr >= 224) {additionalBytes = 2; resc -= 32;} | |
ascent (j in 0 .. additionalBytes) { | |
i++; | |
let cont = nexc(i); | |
if (cont < 128 || cont >= 192) {RaiseError("Continuation byte expected");} | |
resc = resc * 64 + (cont - 128); | |
} | |
res = res ~ chr(resc); | |
} | |
i++; | |
} | |
return ToString(res); | |
} | |
function ObjFileB_ReadStringUTF8(handle, len) { | |
let res = []; | |
ascent (i in 0 .. len) { | |
res = res ~ [ObjFileB_ReadByte(handle)]; | |
} | |
return utf8CharsToString(res, len); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment