-
-
Save r9y9/db6b5484a6a5deca24e81e76cb17e046 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import os | |
from nnmnkwii.datasets import jsut | |
import librosa | |
import librosa.display | |
from matplotlib import pyplot as plt | |
from nnmnkwii.io import hts | |
from os.path import exists | |
if __name__ == "__main__": | |
in_dir = "/home/ryuichi/data/jsut_ver1" | |
transcriptions = jsut.TranscriptionDataSource( | |
in_dir, subsets=jsut.available_subsets).collect_files() | |
wav_paths = jsut.WavFileDataSource( | |
in_dir, subsets=jsut.available_subsets).collect_files() | |
for idx, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)): | |
print(idx, wav_path) | |
lab_path = wav_path.replace("wav/", "lab/").replace(".wav", ".lab") | |
x, sr = librosa.load(wav_path, sr=20000) | |
y, _ = librosa.effects.trim(x, top_db=30) | |
if exists(lab_path): | |
labels = hts.load(lab_path) | |
assert labels[0][-1] == "silB" | |
assert labels[-1][-1] == "silE" | |
b = int(labels[0][1] * 1e-7 * sr) | |
e = int(labels[-1][0] * 1e-7 * sr) | |
yy = x[b:e] | |
else: | |
yy = x | |
plt.figure(figsize=(16, 10)) | |
plt.subplot(3, 1, 1) | |
librosa.display.waveplot(x, sr=sr, x_axis=None) | |
plt.title(wav_path) | |
plt.subplot(3, 1, 2) | |
librosa.display.waveplot(y, sr=sr, x_axis=None) | |
plt.title("Trimmed by librosa.effects.trim") | |
if exists(lab_path): | |
plt.subplot(3, 1, 3) | |
librosa.display.waveplot(yy, sr=sr, x_axis="time") | |
plt.title("Trimmed by HTS-style alignment") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment