Created
March 12, 2022 09:59
-
-
Save X547/63707b092440dca894a826f5ba9f1ee0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MODULE TestUnicode2; | |
CONST | |
ph* = 0FFFDH; (* � *) | |
bom* = 0FEFFH; | |
PROCEDURE CharLen8* (ch: SHORTCHAR): INTEGER; | |
BEGIN | |
CASE ch OF | |
| 000X .. 07FX: RETURN 1; | |
| 0C0X .. 0DFX: RETURN 2; | |
| 0E0X .. 0EFX: RETURN 3; | |
| 0F0X .. 0F7X: RETURN 4; | |
| 0F8X .. 0FBX: RETURN 5; | |
| 0FCX .. 0FDX: RETURN 6; | |
ELSE RETURN 1; | |
END; | |
END CharLen8; | |
PROCEDURE CharLenFromCode8* (code: INTEGER): INTEGER; | |
BEGIN | |
CASE code OF | |
| 0000000H .. 0000007FH: RETURN 1; | |
| 0000080H .. 000007FFH: RETURN 2; | |
| 0000800H .. 0000FFFFH: RETURN 3; | |
| 0010000H .. 001FFFFFH: RETURN 4; | |
| 0200000H .. 03FFFFFFH: RETURN 5; | |
| 4000000H .. 7FFFFFFFH: RETURN 6; | |
END; | |
END CharLenFromCode8; | |
PROCEDURE Encode8* (code: INTEGER; OUT str: ARRAY OF SHORTCHAR; VAR pos: INTEGER); | |
BEGIN | |
CASE code OF | |
| 0000000H .. 0000007FH: | |
str[pos] := SHORT(CHR(code)); INC(pos); | |
| 0000080H .. 000007FFH: | |
str[pos] := SHORT(CHR(0C0H + code DIV 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos); | |
| 0000800H .. 0000FFFFH: | |
str[pos] := SHORT(CHR(0E0H + code DIV 1000H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos); | |
| 0010000H .. 001FFFFFH: | |
str[pos] := SHORT(CHR(0F0H + code DIV 40000H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 1000H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos); | |
| 0200000H .. 03FFFFFFH: | |
str[pos] := SHORT(CHR(0F8H + code DIV 1000000H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 40000H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 1000H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos); | |
| 4000000H .. 7FFFFFFFH: | |
str[pos] := SHORT(CHR(0FCH + code DIV 40000000H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 1000000H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 40000H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 1000H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code DIV 40H MOD 40H)); INC(pos); | |
str[pos] := SHORT(CHR(080H + code MOD 40H)); INC(pos); | |
ELSE | |
Encode8(ph, str, pos); | |
END; | |
END Encode8; | |
PROCEDURE Decode8* (IN str: ARRAY OF SHORTCHAR; VAR pos: INTEGER): INTEGER; | |
VAR code: INTEGER; pos0: INTEGER; | |
BEGIN | |
pos0 := pos; | |
CASE ORD(str[pos]) OF | |
| 000H .. 07FH: | |
code := ORD(str[pos]); INC(pos); | |
| 0C0H .. 0DFH: | |
code := 40H*(ORD(str[pos]) MOD 20H); INC(pos); | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END; | |
| 0E0H .. 0EFH: | |
code := 1000H*(ORD(str[pos]) MOD 10H); INC(pos); | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END; | |
| 0F0H .. 0F7H: | |
code := 40000H*(ORD(str[pos]) MOD 8H); INC(pos); | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END; | |
| 0F8H .. 0FBH: | |
code := 1000000H*(ORD(str[pos]) MOD 4H); INC(pos); | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END; | |
| 0FCH .. 0FDH: | |
code := 40000000H*(ORD(str[pos]) MOD 2H); INC(pos); | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 1000H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, 40H*(ORD(str[pos]) MOD 40H)); INC(pos) ELSE RETURN ph END; | |
IF BITS(ORD(str[pos]))*{6..7} = {7} THEN INC(code, ORD(str[pos]) MOD 40H); INC(pos) ELSE RETURN ph END; | |
ELSE | |
INC(pos); RETURN ph; | |
END; | |
IF CharLenFromCode8(code) # pos - pos0 THEN RETURN ph END; | |
RETURN code; | |
END Decode8; | |
PROCEDURE CharLen16* (ch: CHAR): INTEGER; | |
BEGIN | |
IF ORD(ch) DIV 1024 = 36H THEN RETURN 2 ELSE RETURN 1 END; | |
END CharLen16; | |
PROCEDURE CharLenFromCode16* (code: INTEGER): INTEGER; | |
BEGIN | |
CASE code OF | |
| 0H .. 0D7FFH, 0E000H .. 0FFFFH: RETURN 1; | |
| 10000H .. 10FFFFH: RETURN 2; | |
ELSE RETURN 1; | |
END; | |
END CharLenFromCode16; | |
PROCEDURE Encode16* (code: INTEGER; OUT str: ARRAY OF CHAR; VAR pos: INTEGER); | |
BEGIN | |
CASE code OF | |
| 0H .. 0D7FFH, 0E000H .. 0FFFFH: | |
str[pos] := CHR(code); INC(pos); | |
| 10000H .. 10FFFFH: | |
(* cp - 10000H = xxxxxxxxxx yyyyyyyyyy | |
str = 110110xxxxxxxxxx 110111yyyyyyyyyy *) | |
DEC(code, 10000H); | |
str[pos] := CHR(0D800H + code DIV 400H); INC(pos); | |
str[pos] := CHR(0DC00H + code MOD 400H); INC(pos); | |
ELSE | |
str[pos] := CHR(ph); INC(pos); | |
END; | |
END Encode16; | |
PROCEDURE Decode16* (IN str: ARRAY OF CHAR; VAR pos: INTEGER): INTEGER; | |
BEGIN | |
IF ORD(str[pos]) DIV 1024 = 36H THEN | |
INC(pos, 2); | |
RETURN 1024*(ORD(str[pos-2]) MOD 1024) + ORD(str[pos-1]) MOD 1024 + 10000H; | |
ELSE | |
INC(pos); | |
RETURN ORD(str[pos-1]); | |
END; | |
END Decode16; | |
END TestUnicode2. | |
Placeholder character | |
� (U+FFFD) | |
UTF-8 | |
0xxxxxxx | |
110xxxxx 10xxxxxx | |
1110xxxx 10xxxxxx 10xxxxxx | |
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | |
1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | |
𐍈 (U+10348) | |
000010000001101001000 -> 11110000, 10010000, 10001101, 10001000 | |
𩺊 (U+29E8A) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment