Skip to content

Instantly share code, notes, and snippets.

@marethyu
Created December 30, 2020 03:04
Show Gist options
  • Select an option

  • Save marethyu/fd3054cf94d59d6511cdf687431038ec to your computer and use it in GitHub Desktop.

Select an option

Save marethyu/fd3054cf94d59d6511cdf687431038ec to your computer and use it in GitHub Desktop.
Hiragana/Katakana minimal pair creator
# create_pair.py
#
# requires pydub installation
# run download.py twice (to get both male and female voices) to create the hiragana-syllables directory
import os
import sys
from pydub import AudioSegment
DIR = 'pairs'
SYLLABLE_DIR = 'hiragana-syllables'
MALE = 'miyazaki'
FEMALE = 'kanako'
pause = AudioSegment.silent(duration=700)
def main():
if len(sys.argv) != 4:
print('create_pair.py (M | F) sound1 sound2')
return
gender = MALE if sys.argv[1] == 'M' else FEMALE
sound1 = sys.argv[2]
sound2 = sys.argv[3]
new_dir = os.path.join(os.getcwd(), DIR)
if not os.path.exists(new_dir):
os.mkdir(new_dir)
fname = gender + '-' + sound1 + '+' + sound2 + '.mp3'
first = AudioSegment.from_mp3(os.path.join(SYLLABLE_DIR, gender + '-sound-' + sound1 + '.mp3'))
second = AudioSegment.from_mp3(os.path.join(SYLLABLE_DIR, gender + '-sound-' + sound2 + '.mp3'))
new = first * 3 + pause + second * 3 + pause
new.export(os.path.join(DIR, fname), format='mp3')
print(f'Created {fname}')
if __name__ == '__main__':
main()
# download.py
import os
import requests
import sys
DL_URL = 'http://www.yesjapan.com/online/mp3/'
MALE = 'miyazaki'
FEMALE = 'kanako'
DIR = 'hiragana-syllables'
sounds = [
'a',
'i',
'u',
'e',
'o',
'ka',
'ki',
'ku',
'ke',
'ko',
'sa',
'shi',
'su',
'se',
'so',
'ta',
'chi',
'tsu',
'te',
'to',
'na',
'ni',
'nu',
'ne',
'no',
'ha',
'hi',
'fu',
'he',
'ho',
'ma',
'mi',
'mu',
'me',
'mo',
'ya',
'yu',
'yo',
'ra',
'ri',
'ru',
're',
'ro',
'wa',
'wo',
'n',
'ga',
'gi',
'gu',
'ge',
'go',
'za',
'ji',
'zu',
'ze',
'zo',
'da',
'de',
'do',
'ba',
'bi',
'bu',
'be',
'bo',
'pa',
'pi',
'pu',
'pe',
'po',
'kya',
'kyu',
'kyo',
'sha',
'shu',
'sho',
'cha',
'chu',
'cho',
'nya',
'nyu',
'nyo',
'hya',
'hyu',
'hyo',
'mya',
'myu',
'myo',
'rya',
'ryu',
'ryo',
'gya',
'gyu',
'gyo',
'ja',
'ju',
'jo',
'bya',
'byu',
'byo',
'pya',
'pyu',
'pyo'
]
def main():
if len(sys.argv) != 2:
print('download.py (M | F)')
return
gender = MALE if sys.argv[1] == 'M' else FEMALE
new_dir = os.path.join(os.getcwd(), DIR)
if not os.path.exists(new_dir):
os.mkdir(new_dir)
for sound in sounds:
fname = 'sound-' + sound + '.mp3'
r = requests.get(DL_URL + gender + '/' + fname)
fname = gender + '-' + fname
if r.status_code == 200:
with open(os.path.join(new_dir, fname), 'wb') as f:
f.write(r.content)
print(f'Downloaded {fname}')
else:
print(f'Unable to download {sound} sound file')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment