Last active
November 14, 2020 02:42
-
-
Save pedrominicz/b30d32081ef52b6a9721256e2534b034 to your computer and use it in GitHub Desktop.
Furigana!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from fugashi import Tagger | |
import re | |
import sys | |
# Japanese card creation process | |
# - Word list on Google Keep | |
# - Find phrase and add to a temporary file | |
# - Create cards and add it to `Stage 0` deck | |
# - Verify cards and move from `Stage 1` to `Default` | |
# - Verify cards and move from `Stage 0` to `Stage 1` | |
with open('special.txt', 'r') as file: | |
special = {} | |
for line in file.readlines(): | |
key, value = line.split(',') | |
special[key] = value.strip() | |
katakana = 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヽヾ' | |
hiragana = 'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞ' | |
katakana_to_hiragana = str.maketrans(katakana, hiragana) | |
def extract_okurigana(text, kana): | |
okurigana = '' | |
for _ in range(min(len(text), len(kana))): | |
if text[-1] == kana[-1]: | |
okurigana = text[-1] + okurigana | |
text = text[:-1] | |
kana = kana[:-1] | |
else: | |
break | |
return text, kana, okurigana | |
tagger = Tagger() | |
for word in tagger(sys.stdin.read()): | |
text = word.surface | |
kana = word.feature.kana or '' | |
kana = kana.translate(katakana_to_hiragana) | |
has_kanji = re.match('[一-龥]', text) | |
if text in special: | |
sys.stdout.write(special[text]) | |
elif has_kanji: | |
text, kana, okurigana = extract_okurigana(text, kana) | |
sys.stdout.write('<ruby><rb>{}</rb><rt>{}</rt></ruby>{}'.format(text, kana, okurigana)) | |
else: | |
sys.stdout.write(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
お互い,お<ruby><rb>互</rb><rt>たが</rt></ruby>い | |
お客様,お<ruby><rb>客様</rb><rt>きゃくさま</rt></ruby> | |
お引き受け,お<ruby><rb>引</rb><rt>ひ</rt></ruby>き<ruby><rb>受</rb><rt>う</rt></ruby>け | |
お湯,お<ruby><rb>湯</rb><rt>ゆ</rt></ruby> | |
お目にかかっ,お<ruby><rb>目</rb><rt>め</rt></ruby>かかっ | |
お知らせ,お<ruby><rb>知</rb><rt>し</rt></ruby>らせ | |
お祝い,お<ruby><rb>祝</rb><rt>いわ</rt></ruby>い | |
お茶,お<ruby><rb>茶</rb><rt>ちゃ</rt></ruby> | |
お菓子,お<ruby><rb>菓子</rb><rt>かし</rt></ruby> | |
お話し,お<ruby><rb>話</rb><rt>はな</rt></ruby>し | |
お返し,お<ruby><rb>返</rb><rt>かえ</rt></ruby>し | |
お金,お<ruby><rb>金</rb><rt>かね</rt></ruby> | |
に関する,に<ruby><rb>関</rb><rt>かん</rt></ruby>する | |
ひと目,ひと<ruby><rb>目</rb><rt>め</rt></ruby> | |
わが国,わが<ruby><rb>国</rb><rt>くに</rt></ruby> | |
わが校,わが<ruby><rb>校</rb><rt>こう</rt></ruby> | |
を通じて,を<ruby><rb>通</rb><rt>つう</rt></ruby>じて | |
付き合う,<ruby><rb>付</rb><rt>つ</rt></ruby>き<ruby><rb>合</rb><rt>あ</rt></ruby>う | |
入り口,<ruby><rb>入</rb><rt>い</rt></ruby>り<ruby><rb>口</rb><rt>ぐち</rt></ruby> | |
受け付け,<ruby><rb>受</rb><rt>う</rt></ruby>け<ruby><rb>付</rb><rt>つ</rt></ruby>け | |
吐き出し,<ruby><rb>吐</rb><rt>は</rt></ruby>き<ruby><rb>出</rb><rt>だ</rt></ruby>し | |
問い合わせ,<ruby><rb>問</rb><rt>と</rt></ruby>い<ruby><rb>合</rb><rt>あ</rt></ruby>わせ | |
女の子,<ruby><rb>女</rb><rt>おんな</rt></ruby>の<ruby><rb>子</rb><rt>こ</rt></ruby> | |
引っ越し,<ruby><rb>引</rb><rt>ひ</rt></ruby>っ<ruby><rb>越</rb><rt>こ</rt></ruby>し | |
役に立つ,<ruby><rb>役</rb><rt>やく</rt></ruby>に<ruby><rb>立</rb><rt>た</rt></ruby>つ | |
忘れ物,<ruby><rb>忘</rb><rt>わす</rt></ruby>れ<ruby><rb>物</rb><rt>もの</rt></ruby> | |
思い出す,<ruby><rb>思</rb><rt>おも</rt></ruby>い<ruby><rb>出</rb><rt>だ</rt></ruby>す | |
指し手,<ruby><rb>指</rb><rt>さ</rt></ruby>し<ruby><rb>手</rb><rt>て</rt></ruby> | |
日本,<ruby><rb>日本</rb><rt>にほん</rt></ruby> | |
日本人,<ruby><rb>日本人</rb><rt>にほんじん</rt></ruby> | |
溶かし込ん,<ruby><rb>溶</rb><rt>と</rt></ruby>かし<ruby><rb>込</rb><rt>こ</rt></ruby>ん | |
灌漑,<ruby><rb>灌漑</rb><rt>かんがい</rt></ruby> | |
生き方,<ruby><rb>生</rb><rt>い</rt></ruby>き<ruby><rb>方</rb><rt>かた</rt></ruby> | |
申し訳,<ruby><rb>申</rb><rt>もう</rt></ruby>し<ruby><rb>訳</rb><rt>わけ</rt></ruby> | |
申し込み,<ruby><rb>申</rb><rt>もう</rt></ruby>し<ruby><rb>込</rb><rt>こ</rt></ruby>み | |
申し込む,<ruby><rb>申</rb><rt>もう</rt></ruby>し<ruby><rb>込</rb><rt>こ</rt></ruby>む | |
申し込め,<ruby><rb>申</rb><rt>もう</rt></ruby>し<ruby><rb>込</rb><rt>こ</rt></ruby>め | |
申し込ん,<ruby><rb>申</rb><rt>もう</rt></ruby>し<ruby><rb>込</rb><rt>こ</rt></ruby>ん | |
男の子,<ruby><rb>男</rb><rt>おとこ</rt></ruby>の<ruby><rb>子</rb><rt>こ</rt></ruby> | |
知り合い,<ruby><rb>知</rb><rt>し</rt></ruby>り<ruby><rb>合</rb><rt>あ</rt></ruby>い | |
知り合う,<ruby><rb>知</rb><rt>し</rt></ruby>り<ruby><rb>合</rb><rt>あ</rt></ruby>う | |
知り合っ,<ruby><rb>知</rb><rt>し</rt></ruby>り<ruby><rb>合</rb><rt>あ</rt></ruby>っ | |
私,<ruby><rb>私</rb><rt>わたし</rt></ruby> | |
立ち寄り,<ruby><rb>立</rb><rt>た</rt></ruby>ち<ruby><rb>寄</rb><rt>よ</rt></ruby>り | |
立ち寄る,<ruby><rb>立</rb><rt>た</rt></ruby>ち<ruby><rb>寄</rb><rt>よ</rt></ruby>る | |
考え直し,<ruby><rb>考</rb><rt>かんが</rt></ruby>え<ruby><rb>直</rb><rt>なお</rt></ruby>し | |
考え直す,<ruby><rb>考</rb><rt>かんが</rt></ruby>え<ruby><rb>直</rb><rt>なお</rt></ruby>す | |
見た目,<ruby><rb>見</rb><rt>み</rt></ruby>た<ruby><rb>目</rb><rt>め</rt></ruby> | |
言い合わ,<ruby><rb>言</rb><rt>い</rt></ruby>い<ruby><rb>合</rb><rt>あ</rt></ruby>わ | |
言い方,<ruby><rb>言</rb><rt>い</rt></ruby>い<ruby><rb>方</rb><rt>かた</rt></ruby> | |
話し合い,<ruby><rb>話</rb><rt>はな</rt></ruby>し<ruby><rb>合</rb><rt>あ</rt></ruby>い | |
話し合う,<ruby><rb>話</rb><rt>はな</rt></ruby>し<ruby><rb>合</rb><rt>あ</rt></ruby>う | |
話し合お,<ruby><rb>話</rb><rt>はな</rt></ruby>し<ruby><rb>合</rb><rt>あ</rt></ruby>お | |
話し合っ,<ruby><rb>話</rb><rt>はな</rt></ruby>し<ruby><rb>合</rb><rt>あ</rt></ruby>っ | |
話し方,<ruby><rb>話</rb><rt>はな</rt></ruby>し<ruby><rb>方</rb><rt>かた</rt></ruby> | |
買い物,<ruby><rb>買</rb><rt>か</rt></ruby>い<ruby><rb>物</rb><rt>もの</rt></ruby> | |
1,<ruby><rb>1</rb><rt>いち</rt></ruby> | |
2,<ruby><rb>2</rb><rt>に</rt></ruby> | |
3,<ruby><rb>3</rb><rt>さん</rt></ruby> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment