Last active
August 23, 2024 10:25
-
-
Save tianyuf/1b5fe2e9501825bccf65962cbb049ffc to your computer and use it in GitHub Desktop.
To represent Chinese characters in the machine-readable zone of Chinese passports and other border-crossing documents, the Chinese government uses an algorithm to convert Chinese words into MRZ-compliant characters. The conversion chart is found on the back cover of GA/T 704.14-2015, "Information codes for entry-exit administration. Part 14: Ser…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
To represent Chinese characters in the machine-readable zone of Chinese passports | |
and other border-crossing documents, the Chinese government uses an algorithm to | |
convert Chinese words into MRZ-compliant characters. | |
The conversion chart is found on the back cover of GA/T 704.14-2015, "Information | |
codes for entry-exit administration. Part 14: Serial labels of machine-readable | |
codes on travel permit for residents of Hong Kong and Macao to visit the mainland | |
of China." | |
""" | |
def main(): | |
user_input = str(input("Insert characters to be converted: ")) | |
encode(user_input) | |
def encode(user_input): | |
full_ga_code = '' | |
for char in user_input: | |
# convert characters into gbk internal code | |
internal_code = get_internal_code(char) | |
# based on the GA/T 704.14-2015 chart, convert internal code | |
# into GA (which I guess means "gong an," meaning public security) code | |
ga_code = gbk_to_ga(internal_code) | |
full_ga_code += ga_code | |
print(char, ga_code) | |
print(full_ga_code) | |
def get_internal_code(char): | |
char = str(char.encode('gbk')) | |
char_list = char[2:-1].split('\\x') | |
return (char_list[1] + char_list[2]).upper() | |
def gbk_to_ga(internal_code): | |
new_code = '' | |
gbk = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'] | |
ga = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P'] | |
for char in internal_code: | |
for i in range(len(gbk)): | |
if char == gbk[i]: | |
new_code += ga[i] | |
return new_code | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment