Last active
December 27, 2015 21:49
-
-
Save hecomi/7395004 to your computer and use it in GitHub Desktop.
リップシンクのための 1st ステップとして wav からの入力であいうえお判別しました。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <cstdio> | |
#include <string> | |
#include <memory> | |
#include <complex> | |
#include <vector> | |
#include <cmath> | |
#include <sndfile.h> | |
#include <fftw3.h> | |
#include <boost/format.hpp> | |
using std::complex; | |
using std::vector; | |
// LPC 係数の次数 | |
const int LPC_ORDER = 32; | |
// 解析に用いる微小区間 (sec) | |
const double WINDOW_DURATION = 0.04; | |
// FFT する | |
vector<double> fft(const vector<double>& input); | |
// LPC スペクトル包絡を得る | |
vector<double> lpc(const vector<double>& input, int order, double df); | |
// [1 : -1] に正規化 | |
vector<double> normalize(const vector<double>& input); | |
// ハミング窓をかける | |
vector<double> hamming(const vector<double>& input); | |
// デジタルフィルタ | |
vector<double> freqz(const vector<double>& b, const vector<double>& a, double df, int N); | |
int main() | |
{ | |
// 音声ファイル読み込み | |
SF_INFO sinfo; | |
std::shared_ptr<SNDFILE> sf( | |
sf_open("aiueo.wav", SFM_READ, &sinfo), [](SNDFILE* ptr) { sf_close(ptr); }); | |
const int frame = sinfo.frames; | |
std::unique_ptr<short[]> input(new short[frame]); | |
const double sample_rate = sinfo.samplerate; | |
const double dt = 1.0 / sample_rate; | |
sf_read_short(sf.get(), input.get(), frame); | |
// 結果吐き出し用ファイル | |
std::ofstream wav_log("wav.txt"); | |
std::ofstream freq_log("freq.txt"); | |
// 微小音声区間取り出し | |
const int window_size = static_cast<int>(WINDOW_DURATION / dt); | |
const double df = 1.0 / (window_size / sample_rate); | |
for (int n = 0; n < frame / window_size; ++n) { | |
vector<double> data; | |
for (int i = window_size * n; i < window_size * (n + 1) && i < frame; ++i) { | |
data.push_back(input[i]); | |
} | |
auto hamming_result = normalize( hamming(data) ); | |
auto lpc_result = normalize( lpc(hamming_result, LPC_ORDER, df) ); | |
auto fft_result = normalize( fft(hamming_result) ); | |
// 波形をファイルに書き出し | |
// wav_log << "#data" << n << std::endl; | |
// for (int i = 0; i < window_size; ++i) { | |
// wav_log << boost::format("%1%\t%2%\t%3%\t%4%\n") | |
// % (dt * i) % (data[i]) % (hamming_result[i]) % (lpc_result[i]); | |
// } | |
// wav_log << "\n" << std::endl; | |
// 周波数特性をファイルに書き出し | |
freq_log << "#data" << n << std::endl; | |
for (int i = 0; i < window_size; ++i) { | |
freq_log << boost::format("%1%\t%2%\t%3%\n") | |
% (df * i) % (fft_result[i]) % (lpc_result[i]); | |
} | |
freq_log << "\n" << std::endl; | |
} | |
// Gnuplot で結果を確認 | |
std::shared_ptr<FILE> freq_graph( | |
popen("gnuplot -persist", "w"), [](FILE* ptr) { pclose(ptr); }); | |
fprintf(freq_graph.get(), "load 'freq.plt'\n"); | |
// std::shared_ptr<FILE> wav_graph( | |
// popen("gnuplot -persist", "w"), [](FILE* ptr) { pclose(ptr); }); | |
// fprintf(wav_graph.get(), "load 'wav.plt'\n"); | |
// 片付け | |
return 0; | |
} | |
// FFT | |
vector<double> fft(const vector<double>& input) | |
{ | |
const int frame = input.size(); | |
vector<complex<double>> in(frame), out(frame); | |
auto plan = fftw_plan_dft_1d( | |
frame, | |
reinterpret_cast<fftw_complex*>(&in[0]), | |
reinterpret_cast<fftw_complex*>(&out[0]), | |
FFTW_FORWARD, FFTW_ESTIMATE | |
); | |
for (int i = 0; i < frame; ++i) { | |
in[i] = complex<double>(input[i], 0.0); | |
} | |
fftw_execute(plan); | |
vector<double> fft_result(frame); | |
for (int i = 0; i < frame; ++i) { | |
fft_result[i] = abs(out[i]); | |
} | |
return fft_result; | |
} | |
// LPC | |
vector<double> lpc(const vector<double>& input, int order, double df) | |
{ | |
const int N = input.size(); | |
// 自己相関関数 | |
vector<double> r(N); | |
const int lags_num = order + 1; | |
for (int l = 0; l < lags_num; ++l) { | |
r[l] = 0.0; | |
for (int n = 0; n < N - l; ++n) { | |
r[l] += input[n] * input[n + l]; | |
} | |
} | |
// Levinson-Durbin のアルゴリズムで LPC 係数を計算 | |
vector<double> a(order + 1, 0.0), e(order + 1, 0.0); | |
a[0] = e[0] = 1.0; | |
a[1] = - r[1] / r[0]; | |
e[1] = r[0] + r[1] * a[1]; | |
for (int k = 1; k < order; ++k) { | |
double lambda = 0.0; | |
for (int j = 0; j < k + 1; ++j) { | |
lambda -= a[j] * r[k + 1 - j]; | |
} | |
lambda /= e[k]; | |
vector<double> U(k + 2), V(k + 2); | |
U[0] = 1.0; V[0] = 0.0; | |
for (int i = 1; i < k + 1; ++i) { | |
U[i] = a[i]; | |
V[k + 1 - i] = a[i]; | |
} | |
U[k + 1] = 0.0; V[k + 1] = 1.0; | |
for (int i = 0; i < k + 2; ++i) { | |
a[i] = U[i] + lambda * V[i]; | |
} | |
e[k + 1] = e[k] * (1.0 - lambda * lambda); | |
} | |
// LPC 係数から音声信号再現 | |
// vector<double> lpc_result(N, 0.0); | |
// for (int i = 0; i < N; ++i) { | |
// if (i < order) { | |
// lpc_result[i] = input[i]; | |
// } else { | |
// for (int j = 1; j < order; ++j) { | |
// lpc_result[i] -= a[j] * input[i + 1 - j]; | |
// } | |
// } | |
// } | |
// return lpc_result; | |
return freqz(e, a, df, N); | |
} | |
// 正規化 | |
vector<double> normalize(const vector<double>& input) | |
{ | |
// 最大 / 最小値 | |
auto max = abs( *std::max_element(input.begin(), input.end()) ); | |
auto min = abs( *std::min_element(input.begin(), input.end()) ); | |
double factor = std::max(max, min); | |
vector<double> result( input.size() ); | |
std::transform(input.begin(), input.end(), result.begin(), [factor](double x) { | |
return x / factor; | |
}); | |
return result; | |
} | |
// ハミング窓をかける | |
vector<double> hamming(const vector<double>& input) | |
{ | |
const double N = input.size(); | |
vector<double> result(N); | |
for (int i = 1; i < N - 1; ++i) { | |
const double h = 0.54 - 0.46 * cos(2 * M_PI * i / (N - 1)); | |
result[i] = input[i] * h; | |
} | |
result[0] = result[N - 1] = 0; | |
return result; | |
} | |
// デジタルフィルタ | |
vector<double> freqz(const vector<double>& b, const vector<double>& a, double df, int N) | |
{ | |
vector<double> H(N); | |
for (int n = 0; n < N + 1; ++n) { | |
auto z = std::exp(complex<double>(0.0, -2.0 * M_PI * n / N)); | |
complex<double> numerator(0.0, 0.0), denominator(0.0, 0.0); | |
for (int i = 0; i < b.size(); ++i) { | |
numerator += b[b.size() - 1 - i] * pow(z, i); | |
} | |
for (int i = 0; i < a.size(); ++i) { | |
denominator += a[a.size() - 1 - i] * pow(z, i); | |
} | |
H[n] = abs(numerator / denominator); | |
} | |
return H; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
LPC は以下のサイトを参考にしています : http://aidiary.hatenablog.com/entry/20120415/1334458954
スーパーありがとうございます!