Created
February 13, 2025 01:37
-
-
Save tbogdala/d9db7e2a4ce1adb01331d9f0c8de5e99 to your computer and use it in GitHub Desktop.
Quick and dirty kokoro invocation that also plays the audio as well as saving it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1️⃣ Install required packages | |
# pip install kokoro soundfile sounddevice argparse | |
import sys | |
import argparse | |
from kokoro import KPipeline | |
import soundfile as sf | |
import sounddevice as sd | |
# 🇺🇸 'a' => American English, 🇬🇧 'b' => British English | |
# 🇯🇵 'j' => Japanese: pip install misaki[ja] | |
# 🇨🇳 'z' => Mandarin Chinese: pip install misaki[zh] | |
# pulled from https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md | |
supported_voices = [ | |
"af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", "af_kore", | |
"af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", | |
"am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", | |
"am_puck", "am_santa", "bf_alice", "bf_emma", "bf_isabella", "bf_lily", | |
"bm_daniel", "bm_fable", "bm_george", "bm_lewis", "jf_alpha", "jf_gongitsune", | |
"jf_nezumi", "jf_tebukuro", "jm_kumo", "zf_xiaobei", "zf_xiaoni", | |
"zf_xiaoxiao", "zf_xiaoyi", "zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang" | |
] | |
# 2️⃣ Parse command-line arguments | |
parser = argparse.ArgumentParser(description='Kokoro TTS Processor') | |
parser.add_argument('--voice', type=str, default='af_heart', choices=supported_voices, | |
help='Voice selection (default: af_heart)') | |
parser.add_argument('--speed', type=float, default=1.0, | |
help='Speed setting for the generated speech (default: 1.0)') | |
args = parser.parse_args() | |
# 3️⃣ Read text from standard input | |
text = sys.stdin.read() | |
# 4️⃣ Initialize the pipeline with American English | |
pipeline = KPipeline(lang_code='a') # English by default | |
# 5️⃣ Generate and process audio chunks with selected voice | |
generator = pipeline( | |
text, | |
voice=args.voice, # Use command-line argument for voice | |
speed=1.0, | |
split_pattern=r'\n+' | |
) | |
# 6️⃣ Save and play each generated audio segment | |
for i, (gs, ps, audio) in enumerate(generator): | |
filename = f'output_{i}.wav' | |
sf.write(filename, audio, 24000) | |
# Play audio using sounddevice (cross-platform) | |
sd.play(audio, samplerate=24000) | |
sd.wait() # Wait until audio finishes playing | |
print(f"Processed {i+1} audio segment(s) successfully") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment