Skip to content

Instantly share code, notes, and snippets.

@kzinmr
Last active June 10, 2019 08:45
Show Gist options
  • Save kzinmr/0abe0d898bc6ca33929229c4f91a14f0 to your computer and use it in GitHub Desktop.
Save kzinmr/0abe0d898bc6ca33929229c4f91a14f0 to your computer and use it in GitHub Desktop.
漢数字を数字に直す (revised from https://qiita.com/dosec/items/c6aef40fae6977fd89ab)
import re
tt_ksuji = str.maketrans('一二三四五六七八九〇壱弐参', '1234567890123')
re_suji = re.compile(r'[十拾百千万億兆\d]+')
re_kunit = re.compile(r'[十拾百千]|\d+')
re_manshin = re.compile(r'[万億兆]|[^万億兆]+')
TRANSUNIT = {'十': 10,
'拾': 10,
'百': 100,
'千': 1000}
TRANSMANS = {'万': 10000,
'億': 100000000,
'兆': 1000000000000}
TRANSWA = {'ひと': '1',
'ふた': '2',
'みっ': '3',
'よっ': '4',
'いつ': '5',
'むっ': '6',
'なな': '7',
'やっ': '8',
'ここの': '9'}
def kansuji_to_arabic_numeral(kstring, sep=False):
"""漢数字をアラビア数字に変換"""
def _transvalue(sj, re_obj=re_kunit, transdic=TRANSUNIT):
unit = 1
result = 0
for piece in reversed(re_obj.findall(sj)):
if piece in transdic:
if unit > 1:
result += unit
unit = transdic[piece]
else:
val = int(piece) if piece.isdecimal() else _transvalue(piece)
result += val * unit
unit = 1
if unit > 1:
result += unit
return result
# 文字 -> [0-9]
transuji = kstring.translate(tt_ksuji)
# 10^nの位
for suji in sorted(set(re_suji.findall(transuji)), key=lambda s: -len(s)):
if not suji.isdecimal():
arabic = _transvalue(suji, re_manshin, TRANSMANS)
arabic = '{:,}'.format(arabic) if sep else str(arabic)
transuji = transuji.replace(suji, arabic)
return transuji
def wasuji_to_arabic_numeral(wstring):
# 和数字 -> [1-9]
if wstring in TRANSWA:
suji = TRANSWA[wstring]
if suji.isdecimal():
return suji
return wstring
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment