Last active
October 8, 2023 22:48
-
-
Save Mekajiki/8572429 to your computer and use it in GitHub Desktop.
ひらがなを音声認識アプリケーションJuliusで使われている音素表現(.htkdic)に変換する
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.mekajiki; | |
import com.ibm.icu.text.Transliterator; | |
import java.util.ArrayList; | |
import java.util.List; | |
public class Hiragana2Phoneme { | |
public static String hiragana2Phoneme(String text) { | |
return romaji2Phoneme(hiragana2Romaji(text)); | |
} | |
public static String hiragana2Romaji(String text) { | |
Transliterator transliterator = Transliterator.getInstance("Hiragana-Latin"); | |
return transliterator.transliterate(text); | |
} | |
public static String hiragana2katakana(String text) { | |
Transliterator transliterator = Transliterator.getInstance("Hiragana-Katakana"); | |
return transliterator.transliterate(text); | |
} | |
public static String romaji2Phoneme(String text) { | |
List<String[]> replaceList = new ArrayList<String[]>(); | |
//撥音のN | |
replaceList.add(new String[]{"n'", "N"}); | |
replaceList.add(new String[]{"n([^aiueo])", "N$1"}); | |
//おう→おーの音便 | |
replaceList.add(new String[]{"ou", "o:"}); | |
//長音 | |
replaceList.add(new String[]{"ā", "a:"}); | |
replaceList.add(new String[]{"ī", "i:"}); | |
replaceList.add(new String[]{"ū", "u:"}); | |
replaceList.add(new String[]{"ē", "e:"}); | |
replaceList.add(new String[]{"ō", "o:"}); | |
//同じ母音の連続→長音 | |
String[] vowels = {"a", "i", "u", "e", "o"}; | |
for(String vowel : vowels) { | |
replaceList.add(new String[]{vowel + "{2,}", vowel + ":"}); | |
} | |
//スペース区切り | |
replaceList.add(new String[]{"[aiueoN]:?", "$0 "}); | |
replaceList.add(new String[]{"[^aiueoN]{1,4}", "$0 "}); | |
replaceList.add(new String[]{"\\s+", " "}); | |
//tchを促音とみなす | |
replaceList.add(new String[]{"tch", "q ch"}); | |
//dzは音素として不正 | |
replaceList.add(new String[]{"dz", "z"}); | |
//同じ子音の連続を促音とみなす | |
replaceList.add(new String[]{" ([^aiueo])$1", " q $1"}); | |
for(String[] map : replaceList) { | |
text = text.replaceAll(map[0], map[1]); | |
} | |
return text; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment