Skip to content

Instantly share code, notes, and snippets.

@lynn
Last active February 21, 2024 11:47
Show Gist options
  • Save lynn/07f7ce3c314223d2aca19ec2bb0540cd to your computer and use it in GitHub Desktop.
Save lynn/07f7ce3c314223d2aca19ec2bb0540cd to your computer and use it in GitHub Desktop.
Inject rules into the rikaichan/rikaikun `deinflect.dat` file
# usage: python patch-deinflect.py
# (reads deinflect.dat in same folder, writes to new-deinflect.dat)
import fileinput
# Bitmask values
CONJ_RU_VERB = 0x0001
CONJ_U_VERB = 0x0002
CONJ_I_ADJ = 0x0004
CONJ_KURU = 0x0008
CONJ_SURU = 0x0010
CONJ_OTHER = 0x0080
BASE_RU_VERB = 0x0100
BASE_U_VERB = 0x0200
BASE_I_ADJ = 0x0400
BASE_KURU = 0x0800
BASE_SURU = 0x1000
class Rule:
"""A rule in a rikaichan deinflect rules file."""
def __init__(self, conj, base, mask, form_index):
self.conj = conj
self.base = base
self.mask = int(mask)
self.form_index = int(form_index)
def order(self):
"""Return a value that acts as a key for ordering rules."""
return (-len(self.conj), self.conj)
def build(self):
"""Build a string representation of this rule."""
return '{}\t{}\t{}\t{}'.format(self.conj, self.base, self.mask, self.form_index)
class Deinflect:
"""The contents of a rikaichan deinflect rules file."""
def __init__(self, line_iter):
"""Parse a deinflect rules file from an iterator of lines."""
self.header = None
self.forms = []
self.rules = []
for line in line_iter:
line = line.strip()
fields = line.split('\t')
if 'Deinflect Rules' in line:
self.header = line
elif len(fields) == 1:
self.forms.append(line)
elif len(fields) == 4:
self.rules.append(Rule(*fields))
else:
raise ValueError('invalid deinflect line')
def build(self):
"""Yield lines forming a deinflect rules file."""
yield self.header
for form in self.forms:
yield form
for rule in sorted(self.rules, key=Rule.order):
yield rule.build()
def form_index(self, form):
"""
Return the index of a form in the forms list.
If the given form is new, it is added to the list, and the new index is returned.
"""
try:
return self.forms.index(form)
except ValueError:
index = len(self.forms)
self.forms.append(form)
return index
if __name__ == '__main__':
d = Deinflect(open('deinflect.dat', encoding='utf-8'))
te_index = d.form_index('-te')
te_rules = [rule for rule in d.rules if rule.form_index == te_index]
# https://en.wikipedia.org/wiki/Japanese_verb_conjugation#Usage_3
te_helpers = [
('いる', 'progressive', CONJ_RU_VERB),
('る', 'progressive', CONJ_RU_VERB),
('おる', 'progressive', CONJ_U_VERB),
('おく', 'preparatory', CONJ_U_VERB),
('ある', 'resultant', CONJ_RU_VERB),
('しまう', '-te shimau', CONJ_U_VERB),
('みる', 'try', CONJ_RU_VERB),
('いく', 'go', CONJ_U_VERB),
('行く', 'go', CONJ_U_VERB),
('く', 'go', CONJ_U_VERB),
('くる', 'come', CONJ_KURU),
('来る', 'come', CONJ_KURU),
('ください', 'please do', CONJ_OTHER),
('はいけない', 'no good', CONJ_OTHER),
('もいい', "it's OK to", CONJ_OTHER),
('もよかった', "it was OK to", CONJ_OTHER),
('も良い', "it's OK to", CONJ_OTHER),
('も良かった', "it was OK to", CONJ_OTHER),
('もかまわない', "don't mind if", CONJ_I_ADJ),
('も構わない', "don't mind if", CONJ_I_ADJ),
('もかまいません', "don't mind if", CONJ_I_ADJ),
('も構いません', "don't mind if", CONJ_I_ADJ),
('ほしい', "I want you to", CONJ_I_ADJ),
('欲しい', "I want you to", CONJ_I_ADJ),
('すみません', 'sorry for', CONJ_OTHER),
('くれてありがとう', 'thanks for', CONJ_OTHER),
('くれる', 'favor to me', CONJ_RU_VERB),
('あげる', 'favor to other', CONJ_RU_VERB),
('もらう', 'receive favor', CONJ_U_VERB),
('いただく', 'receive favor', CONJ_U_VERB),
]
for (verb, form, bit) in te_helpers:
fi = d.form_index(form)
for rule in te_rules:
new_mask = rule.mask & ~0xFF | bit
new_rule = Rule(rule.conj + verb, rule.base, new_mask, fi)
d.rules.append(new_rule)
with open('new-deinflect.dat', 'wb') as f:
f.write('\n'.join(k for k in d.build()).encode('utf-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment