Skip to content

Instantly share code, notes, and snippets.

@kzinmr
Created November 1, 2018 05:26
Show Gist options
  • Save kzinmr/db8cb4b1babc6da06edb4f6e5c95f876 to your computer and use it in GitHub Desktop.
Save kzinmr/db8cb4b1babc6da06edb4f6e5c95f876 to your computer and use it in GitHub Desktop.
日付の構造化
import re
import unicodedata
from transduce_jpnumber import kansuji_to_arabic_numeral
_date_re = re.compile('((?P<year>\d{4})[-/年](?P<ysuf>[^0-9\-/年月日\s]+)?)?((?P<month>\d{1,2})[-/月](?P<msuf>[^0-9\-/年月日\s]+)?)?((?P<date>\d{1,2})[日]?(?P<dsuf>[^0-9\-/年月日\s]+)?)?')
def transduce_jpdate(datestr):
datestr_n = kansuji_to_arabic_numeral(datestr)
datestr_n = unicodedata.normalize('NFKC', datestr_n)
m = _date_re.match(datestr_n.strip())
result = {}
if m is not None:
return {'year': m.group('year'),
'month': m.group('month'),
'date': m.group('date'),
'year_suffix': m.group('ysuf'),
'month_suffix': m.group('msuf'),
'date_suffix': m.group('dsuf')}
return result
transduce_jpdate('2018年3月上旬')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment