Created
November 1, 2018 05:26
-
-
Save kzinmr/db8cb4b1babc6da06edb4f6e5c95f876 to your computer and use it in GitHub Desktop.
日付の構造化
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import unicodedata | |
from transduce_jpnumber import kansuji_to_arabic_numeral | |
_date_re = re.compile('((?P<year>\d{4})[-/年](?P<ysuf>[^0-9\-/年月日\s]+)?)?((?P<month>\d{1,2})[-/月](?P<msuf>[^0-9\-/年月日\s]+)?)?((?P<date>\d{1,2})[日]?(?P<dsuf>[^0-9\-/年月日\s]+)?)?') | |
def transduce_jpdate(datestr): | |
datestr_n = kansuji_to_arabic_numeral(datestr) | |
datestr_n = unicodedata.normalize('NFKC', datestr_n) | |
m = _date_re.match(datestr_n.strip()) | |
result = {} | |
if m is not None: | |
return {'year': m.group('year'), | |
'month': m.group('month'), | |
'date': m.group('date'), | |
'year_suffix': m.group('ysuf'), | |
'month_suffix': m.group('msuf'), | |
'date_suffix': m.group('dsuf')} | |
return result | |
transduce_jpdate('2018年3月上旬') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment