Created
December 1, 2024 02:43
-
-
Save max-dark/8922cd047164091c4053261a9df9fb7c to your computer and use it in GitHub Desktop.
utf decode
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // https://en.wikipedia.org/wiki/UTF-8 | |
| #include <iostream> | |
| #include <string> | |
| #include <iomanip> | |
| using u_char = unsigned char; | |
| enum utf_mask: u_char | |
| { | |
| char_part = 0b10000000, | |
| part_mask = 0b11000000, | |
| n1_value = 0b00000000, | |
| n1_mask = 0b10000000, | |
| n2_value = 0b11000000, | |
| n2_mask = 0b11100000, | |
| n3_value = 0b11100000, | |
| n3_mask = 0b11110000, | |
| n4_value = 0b11110000, | |
| n4_mask = 0b11111000, | |
| }; | |
| bool eq(char c, u_char v, u_char m) | |
| { | |
| return v == (c & m); | |
| } | |
| bool is_char_part(char c) | |
| { | |
| return eq(c, char_part, part_mask); | |
| } | |
| int get_size(char c) | |
| { | |
| if (is_char_part(c)) return -1; | |
| if (eq(c, n1_value, n1_mask)) return 1; | |
| if (eq(c, n2_value, n2_mask)) return 2; | |
| if (eq(c, n3_value, n3_mask)) return 3; | |
| if (eq(c, n4_value, n4_mask)) return 4; | |
| return -2; | |
| } | |
| int main() | |
| { | |
| using namespace std::literals; | |
| std::string str = u8"абв"; | |
| for (size_t i = 0; i < str.size();) | |
| { | |
| char c = str[i]; | |
| int n = get_size(c); | |
| if (n <= 0) | |
| { | |
| std::cout << "<?>" << std::endl; | |
| ++i; | |
| } | |
| else | |
| { | |
| std::cout << str.substr(i, n) << std::endl; | |
| i += n; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment