Last active
July 3, 2020 17:06
-
-
Save tam17aki/14b54e4014b2ce2cfbe42ec5f3de4904 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyworld as pw | |
import pysptk | |
from scipy.io import wavfile | |
import numpy as np | |
fs, x = wavfile.read(pysptk.util.example_audio_file()) | |
assert fs == 16000 | |
wavfile.write('./orig.wav', fs, x) | |
# shortからfloatに変換します | |
x = x.astype(np.float64) | |
# 特徴量抽出(基本周波数、スペクトル包絡、非周期性指標) | |
f0, sp, ap = pw.wav2world(x, fs) | |
fft_size = pw.get_cheaptrick_fft_size(fs) | |
# 特徴量の次元削減 -> DCTベースの手法 | |
# https://www.isca-speech.org/archive/Interspeech_2017/abstracts/0067.html | |
sp_dim = 50 # 50次元まで削減しても音質は削減前と変わらない | |
code_sp = pw.code_spectral_envelope(sp, fs, sp_dim) | |
code_ap = pw.code_aperiodicity(ap, fs) | |
# The `dim` of code ap is defined based on the `fs` as follow: | |
# fs = `16000` : `1` | |
# fs = `22050` : `2` | |
# fs = `44100` : `5` | |
# fs = `48000` : `5` | |
decode_sp = pw.decode_spectral_envelope(code_sp, fs, fft_size) | |
decode_ap = pw.decode_aperiodicity(code_ap, fs, fft_size) | |
# 再合成 | |
y = pw.synthesize(f0, sp, ap, fs) | |
y = y.astype(np.int16) | |
outfile = 'world_resynthesis.wav' | |
wavfile.write(outfile, fs, y) | |
# 再合成 | |
y = pw.synthesize(f0, decode_sp, decode_ap, fs) | |
y = y.astype(np.int16) | |
outfile = 'world_resynthesis_coded.wav' | |
wavfile.write(outfile, fs, y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment