-
-
Save tam17aki/0f9fd16cb6022d2015b9358ee7ce01b0 to your computer and use it in GitHub Desktop.
juliusのセグメンテーション結果に基づいて音声をトリミングする
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import os | |
from nnmnkwii.datasets import jsut | |
import librosa | |
import librosa.display | |
from matplotlib import pyplot as plt | |
from nnmnkwii.io import hts | |
from os.path import exists | |
if __name__ == "__main__": | |
in_dir = "/home/ryuichi/data/jsut_ver1" | |
transcriptions = jsut.TranscriptionDataSource( | |
in_dir, subsets=jsut.available_subsets).collect_files() | |
wav_paths = jsut.WavFileDataSource( | |
in_dir, subsets=jsut.available_subsets).collect_files() | |
for subset in jsut.available_subsets: | |
save_dir = join(in_dir, subset, "wav_trim") | |
os.makedirs(save_dir, exist_ok=True) | |
for idx, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)): | |
print(idx, wav_path) | |
lab_path = wav_path.replace("wav/", "lab/").replace(".wav", ".lab") | |
x, sr = librosa.load(wav_path, sr=20000) | |
y, _ = librosa.effects.trim(x, top_db=30) | |
if exists(lab_path): | |
labels = hts.load(lab_path) | |
assert labels[0][-1] == "silB" | |
assert labels[-1][-1] == "silE" | |
b = int(labels[0][1] * 1e-7 * sr) | |
e = int(labels[-1][0] * 1e-7 * sr) | |
yy = x[b:e] | |
else: | |
yy = x | |
out_wav_path = wav_path.replace("wav/", "wav_trim/") | |
librosa.output.write_wav(out_wav_path, yy, sr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment