Created
February 7, 2017 11:42
-
-
Save bshishov/d45794fb5e50cc5f0df3025398b6cc6e to your computer and use it in GitHub Desktop.
Fundamental frequency estimation using cepsrtal analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import matplotlib.pyplot as plt | |
import wave | |
import numpy as np | |
# Mapping for numpy arrays | |
types = {1: np.int8, 2: np.int16, 4: np.int32} | |
# Open the wave file | |
filename = input("Path to wave file: ") | |
wav = wave.open(filename, mode="r") | |
nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams() | |
# Inititalize a fundamental frequency | |
freqs = [] | |
up = framerate // 80 | |
down = framerate // 270 | |
d = framerate / 270.0 | |
print(up, down, d) | |
# Number of frames per window | |
window_size = 4096 | |
# Create a window function | |
window = np.hamming(window_size) | |
# Ploting initialization | |
plt.ion() | |
fig = plt.figure(figsize=(14, 10)) | |
signal_plot = fig.add_subplot(411, title='Signal') | |
spectrum_plot = fig.add_subplot(412, title='Spectrum') | |
cepstrum_plot = fig.add_subplot(413, title='Cepstrum') | |
ff_plot = fig.add_subplot(414, title='Fundamental frequency') | |
# Iterate over the wave file frames | |
for i in range(nframes // window_size): | |
# Reading n=window_size frames from the wave file | |
content = wav.readframes(window_size) | |
# Converting array of bytes to array of integers according to sampwidth. If stereo only the first channel is picked | |
samples = np.fromstring(content, dtype=types[sampwidth])[0::nchannels] | |
# Applying window function for our samples | |
samples = window * samples | |
# Calculating spectrum of a signal frame as fft with n=window_size | |
spectrum = np.fft.fft(samples, n=window_size) | |
# Calculating cepstrum as ifft(log(abs(spectrum)))) | |
cepstrum = np.fft.ifft(np.log(np.abs(spectrum))).real | |
# Calculating fundamental frequency by finding peak | |
fund_freq = framerate * len(cepstrum) / (np.argmax(cepstrum[down:up]) + d) / len(cepstrum) | |
freqs.append(fund_freq) | |
# Plot signal | |
signal_plot.clear() | |
signal_plot.axis([0, window_size, -16000, 16000]) | |
signal_plot.plot(samples) | |
# Plot spectrum | |
spectrum_plot.clear() | |
spectrum_plot.axis([0, window_size // 2, -80000, 80000]) | |
spectrum_plot.plot(spectrum[:window_size // 2]) | |
# Plot cestrum | |
cepstrum_plot.clear() | |
cepstrum_plot.axis([0, window_size // 2, -0.2, 0.2]) | |
cepstrum_plot.plot(cepstrum[3:window_size // 2]) | |
# Plot fundamental frequency | |
ff_plot.clear() | |
ff_plot.plot(freqs) | |
fig.canvas.draw() | |
fig.canvas.flush_events() | |
# Wait 30s for the next iteration | |
time.sleep(0.5) | |
plt.show(block=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment