Last active
December 30, 2019 14:07
-
-
Save mozbugbox/b73f1c10074ed9ba7446993d6b78e55b to your computer and use it in GitHub Desktop.
中文数字转阿拉伯数字 Convert Chinese number count into Arabic integers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# vim:fileencoding=utf-8:sw=4:et | |
# 参考 <https://blog.csdn.net/grllery/article/details/89430363> | |
def numzh2int(txt): | |
"""Convert Chinese number count to Arabic integer""" | |
cn_nums = {'〇': 0, '一': 1, '二': 2, '三': 3, '四': 4, | |
'五': 5, '六': 6, '七': 7, '八': 8, '九': 9, | |
'零': 0, '壹': 1, '贰': 2, '叁': 3, '肆': 4, | |
'伍': 5, '陆': 6, '柒': 7, '捌': 8, '玖': 9, | |
'貮': 2, '两': 2, } | |
uint = {'十': 10, '拾': 10, | |
'百': 100, '佰': 100, | |
'千': 1000, '仟': 1000, | |
'万': 10000, '萬': 10000, | |
'亿': 100000000, '億': 100000000, | |
'兆': 1000000000000, } | |
units = set(uint.values()) | |
unit_of_unit = [10**4, 10**8, 10**12] # 万亿兆: unit of unit | |
nums = [] | |
for c in reversed(txt): | |
if c in cn_nums: | |
num = cn_nums[c] | |
nums.append(num) | |
else: | |
u = uint[c] | |
nums.append(u) | |
res = 0 | |
unit_c = 1 # current unit | |
unit_of_unit_c = 1 # current unit of unit | |
# print(nums) | |
for i, num in enumerate(nums): | |
if num in units: | |
if num in unit_of_unit and num > unit_of_unit_c: | |
unit_of_unit_c = num | |
unit_c = num | |
else: | |
unit_c = num * unit_of_unit_c | |
if i == 1: # Fix for 二百五 == [5, 100, 2] | |
res = res * unit_c // 10 | |
else: | |
res = res + num * unit_c | |
unit_c = 1 | |
# Fix for 十一 == [1, 10]. | |
# Extra unit_c not reset by the digit place. | |
if unit_c > 1: | |
res += unit_c | |
return res | |
def main(): | |
test_nums = [ | |
("八", 8), | |
("十", 10), | |
("十一", 11), | |
("五十", 50), | |
("五百", 500), | |
("三千", 3_000), | |
("一万", 10_000), | |
("十万", 100_000), | |
("百万", 1_000_000), | |
("三十二", 32), | |
("二百五", 250), | |
("两万五", 25_000), | |
("万八千", 18_000), | |
("一百万", 1_000_000), | |
("三千万", 30_000_000), | |
("四万亿", 4_000_000_000_000), | |
("五百零三", 503), | |
("五百一十", 510), | |
("一百二十三", 123), | |
("一千二百零三", 1_203), | |
("一万一千一百", 11_100), | |
("一万一千一百零一", 11_101), | |
("一万一千一百一十", 11_110), | |
("十万三千六百零九", 103_609), | |
("十万零三千六百零九", 103_609), | |
("一百二十三万四千五百六十七", 1_234_567), | |
("一千一百二十三万四千五百六十七", 11_234_567), | |
("一亿一千一百二十三万四千五百六十七", 111_234_567), | |
("一百零二亿五千零一万一千零三十八", 10_250_011_038), | |
("一万三千零三十二亿五千四百零一万一千零三十八", 1_303_254_011_038), | |
("一兆三千零三十二亿五千四百零一万一千零三十八", 1_303_254_011_038), | |
("一千兆三千零三十二亿五千四百零一万一千零三十八", 1_000_303_254_011_038), | |
("一万兆三千零三十二亿五千四百零一万一千零三十八", 10_000_303_254_011_038), | |
] | |
#item = "一千兆三千零三十二亿五千四百零一万零一千零三十八"; print(f'{item}, {numzh2int(item):,d}'); return | |
for item, val in test_nums: | |
r = numzh2int(item) | |
print(f'{r==val} {item}, {r:,d}') | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment