Created
December 28, 2016 02:49
-
-
Save emasaka/34a12b82af198a63e318b4aab308503c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# answer to http://ja.stackoverflow.com/questions/31353/python3-%e3%81%a7%e3%81%ae%e6%ad%a3%e8%a6%8f%e8%a1%a8%e7%8f%be%e3%81%b8%e3%81%ae%e5%a4%89%e6%8f%9b%e3%81%ab%e3%81%a4%e3%81%84%e3%81%a6 | |
import re | |
def _parse_numrange(s): | |
m = re.search(r'\[(\d+)-(\d+)\]', s) | |
return m.group(1), m.group(2) | |
def _join_pos_digit(lst, p): | |
lst2 = [lst[0]] # 処理後のリスト | |
for i in range(1, len(lst)): | |
if len(lst[i]) < p or len(lst[i - 1]) < p: | |
# 比較する桁が存在しない | |
lst2.append(lst[i]) | |
continue | |
# lst中の前の値と当該桁を比較 | |
rst = lst[i][-(p - 1)] if p > 1 else [] | |
rst1 = lst[i - 1][-(p - 1)] if p > 1 else [] | |
if int(lst[i - 1][-p]) + 1 == int(lst[i][-p]) and rst == rst1: | |
# 当該桁が前の値+1で、当該桁以降が同じなら、lst2でまとめる | |
if isinstance(lst2[-1][-p], list): | |
# lst2で当該桁をまとめ済み | |
lst2[-1][-p].append(lst[i][-p]) | |
else: | |
# lst2で当該桁はまだまとめていない | |
lst2[-1][-p] = [lst2[-1][-p], lst[i][-p]] | |
else: | |
# lst2に追加 | |
lst2.append(lst[i]) | |
return lst2 | |
def _check_head(lst): | |
lst2 = [lst[0]] # 処理後のリスト | |
for i in range(1, len(lst)): | |
if len(lst[i]) == len(lst[i - 1]) + 1 and lst[i][1:] == lst[i - 1]: | |
lst2[i - 1] = [lst[i][0], '?', *lst[i - 1]] | |
else: | |
lst2.append(lst[i]) | |
return lst2 | |
def _lst2re(lst): | |
lst2 = [] | |
for x in lst: | |
s = '' | |
for c in x: | |
if isinstance(c, list): | |
if len(c) > 2: | |
s = s + '[' + c[0] + '-' + c[-1] + ']' | |
else: | |
s = s + '[' + c[0] + c[1] + ']' | |
else: | |
s = s + c | |
lst2.append(s) | |
return '|'.join(lst2) | |
def numrange2re(s): | |
st, ed = _parse_numrange(s) | |
# (数値 → 文字列 → 文字のリスト) のリスト | |
lst = [list(str(x)) for x in range(int(st), int(ed) + 1)] | |
# 右端の桁から [] にまとめる処理 | |
for p in range(1, len(ed) + 1): | |
lst = _join_pos_digit(lst, p) | |
# 左端の文字をチェックして x? にするところを探す | |
lst = _check_head(lst) | |
# リスト → 正規表現文字列 | |
return _lst2re(lst) | |
print(numrange2re('[10-20]')) #=> '1[0-9]|20' | |
print(numrange2re('[0-100]')) #=> '[1-9]?[0-9]|100' | |
print(numrange2re('[23-94]')) #=> '2[3-9]|[3-8][0-9]|9[0-4]' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment