Skip to content

Instantly share code, notes, and snippets.

@X547
Created March 12, 2022 09:59
Show Gist options
  • Save X547/63707b092440dca894a826f5ba9f1ee0 to your computer and use it in GitHub Desktop.
Save X547/63707b092440dca894a826f5ba9f1ee0 to your computer and use it in GitHub Desktop.
MODULE TestUnicode2;
CONST
ph* = 0FFFDH; (* � *)
bom* = 0FEFFH;
PROCEDURE CharLen8* (ch: SHORTCHAR): INTEGER;
BEGIN
CASE ch OF
| 000X .. 07FX: RETURN 1;
| 0C0X .. 0DFX: RETURN 2;
| 0E0X .. 0EFX: RETURN 3;
| 0F0X .. 0F7X: RETURN 4;
| 0F8X .. 0FBX: RETURN 5;
| 0FCX .. 0FDX: RETURN 6;
ELSE RETURN 1;
END;
END CharLen8;
PROCEDURE CharLenFromCode8* (code: INTEGER): INTEGER;
BEGIN
CASE code OF
| 0000000H .. 0000007FH: RETURN 1;
| 0000080H .. 000007FFH: RETURN 2;
| 0000800H .. 0000FFFFH: RETURN 3;
| 0010000H .. 001FFFFFH: RETURN 4;
| 0200000H .. 03FFFFFFH: RETURN 5;
| 4000000H .. 7FFFFFFFH: RETURN 6;
END;
END CharLenFromCode8;
PROCEDURE Encode8* (code: INTEGER; OUT str: ARRAY OF SHORTCHAR; VAR pos: INTEGER);
BEGIN
CASE code OF
| 0000000H .. 0000007FH:
str[pos] := SHORT(CHR(code)); INC(pos);
| 0000080H .. 000007FFH:
str[pos] := SHORT(CHR(0C0H + code DIV 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos);
| 0000800H .. 0000FFFFH:
str[pos] := SHORT(CHR(0E0H + code DIV 1000H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos);
| 0010000H .. 001FFFFFH:
str[pos] := SHORT(CHR(0F0H + code DIV 40000H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 1000H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos);
| 0200000H .. 03FFFFFFH:
str[pos] := SHORT(CHR(0F8H + code DIV 1000000H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 40000H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 1000H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos);
| 4000000H .. 7FFFFFFFH:
str[pos] := SHORT(CHR(0FCH + code DIV 40000000H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 1000000H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 40000H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 1000H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos);
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos);
ELSE
Encode8(ph, str, pos);
END;
END Encode8;
PROCEDURE Decode8* (IN str: ARRAY OF SHORTCHAR; VAR pos: INTEGER): INTEGER;
VAR code: INTEGER; pos0: INTEGER;
BEGIN
pos0 := pos;
CASE ORD(str[pos]) OF
| 000H .. 07FH:
code := ORD(str[pos]); INC(pos);
| 0C0H .. 0DFH:
code := 40H*(ORD(str[pos]) MOD 20H); INC(pos);
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END;
| 0E0H .. 0EFH:
code := 1000H*(ORD(str[pos]) MOD 10H); INC(pos);
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END;
| 0F0H .. 0F7H:
code := 40000H*(ORD(str[pos]) MOD 8H); INC(pos);
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END;
| 0F8H .. 0FBH:
code := 1000000H*(ORD(str[pos]) MOD 4H); INC(pos);
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END;
| 0FCH .. 0FDH:
code := 40000000H*(ORD(str[pos]) MOD 2H); INC(pos);
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END;
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END;
ELSE
INC(pos); RETURN ph;
END;
IF CharLenFromCode8(code) # pos - pos0 THEN RETURN ph END;
RETURN code;
END Decode8;
PROCEDURE CharLen16* (ch: CHAR): INTEGER;
BEGIN
IF ORD(ch) DIV 1024 = 36H THEN RETURN 2 ELSE RETURN 1 END;
END CharLen16;
PROCEDURE CharLenFromCode16* (code: INTEGER): INTEGER;
BEGIN
CASE code OF
| 0H .. 0D7FFH, 0E000H .. 0FFFFH: RETURN 1;
| 10000H .. 10FFFFH: RETURN 2;
ELSE RETURN 1;
END;
END CharLenFromCode16;
PROCEDURE Encode16* (code: INTEGER; OUT str: ARRAY OF CHAR; VAR pos: INTEGER);
BEGIN
CASE code OF
| 0H .. 0D7FFH, 0E000H .. 0FFFFH:
str[pos] := CHR(code); INC(pos);
| 10000H .. 10FFFFH:
(* cp - 10000H = xxxxxxxxxx yyyyyyyyyy
str = 110110xxxxxxxxxx 110111yyyyyyyyyy *)
DEC(code, 10000H);
str[pos] := CHR(0D800H + code DIV 400H); INC(pos);
str[pos] := CHR(0DC00H + code MOD 400H); INC(pos);
ELSE
str[pos] := CHR(ph); INC(pos);
END;
END Encode16;
PROCEDURE Decode16* (IN str: ARRAY OF CHAR; VAR pos: INTEGER): INTEGER;
BEGIN
IF ORD(str[pos]) DIV 1024 = 36H THEN
INC(pos, 2);
RETURN 1024*(ORD(str[pos-2]) MOD 1024) + ORD(str[pos-1]) MOD 1024 + 10000H;
ELSE
INC(pos);
RETURN ORD(str[pos-1]);
END;
END Decode16;
END TestUnicode2.
Placeholder character
� (U+FFFD)
UTF-8
0xxxxxxx
110xxxxx 10xxxxxx
1110xxxx 10xxxxxx 10xxxxxx
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
𐍈 (U+10348)
000010000001101001000 -> 11110000, 10010000, 10001101, 10001000
𩺊 (U+29E8A)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment