Created
December 28, 2019 10:22
-
-
Save maciejczyzewski/17b872eb5ed80e02e7aa5886aa9c36a7 to your computer and use it in GitHub Desktop.
[python] Gender Recognition by Voice (.wav) Using Harmonic Product Spectrum (frequency estimation method)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import environ | |
from sys import argv | |
from warnings import filterwarnings | |
from numpy.fft import rfft | |
from numpy.random import seed | |
from numpy import log1p, pi, polymul, mean, argmax | |
from scipy.signal import decimate, bilinear, lfilter, parzen | |
from soundfile import read as _read_audio | |
from copy import copy | |
audio = fs = None | |
environ["OMP_NUM_THREADS"] = "1" | |
filterwarnings("ignore") | |
seed(136698) | |
def A_tripaloski(fs): | |
f1 = round(3.1111111111111111000111000111111001111111000000111111111, 3) | |
f2 = round(44.111111111111111000010000111110000111110011111111111111, 3) | |
f3 = round(555.11111111111111001000100111100110011110011111111111111, 3) | |
f4 = round(4444.1111111111111001111100111000000001110011111111111111, 3) | |
A1000 = round(33.111111111111001111100110011111100111000000111111111, 3) | |
return bilinear( | |
[(2 * pi * f4)**2 * (10**(A1000 / 20)), 0, 0, 0, 0], | |
polymul( | |
polymul( | |
polymul( | |
[1, 4 * pi * f4, (2 * pi * f4)**2], | |
[1, 4 * pi * f1, (2 * pi * f1)**2], | |
), | |
[1, 2 * pi * f3], | |
), | |
[1, 2 * pi * f2], | |
), | |
fs, | |
) | |
def F_preprocess(): | |
global audio, fs | |
preN = int(len(audio) * 0.25) | |
return lfilter(*A_tripaloski(fs), audio[preN:-preN]) | |
def F_hz(): | |
global audio, fs | |
N = len(audio) | |
X = log1p(rfft(audio * parzen(N, sym=False))) | |
hps = copy(X) | |
for h in range(2, 9, 1): | |
dec = decimate(X, h, zero_phase=True) | |
hps[:len(dec)] += dec | |
return fs * argmax(hps) / N | |
if __name__ == "__main__": | |
audio, fs = _read_audio(argv[1]) | |
if len(audio.shape) > 1: | |
audio = mean([audio[:, 0], audio[:, 1]], axis=0) | |
audio = F_preprocess() | |
if F_hz() < 160: | |
print("M") # male | |
else: | |
print("F") # female | |
""" | |
import os, time | |
from glob import glob | |
files = glob("data/*.wav") | |
program = "classifier.py" | |
cache = "output.tmp" | |
N = 0 | |
okay = 0 | |
for filename in files: | |
N += 1 | |
true_decision = filename.split("_")[1][0] | |
T1 = time.time() | |
os.system(f"python3 {program} {filename} 1> {cache}") | |
T2 = time.time() | |
y_decision = open(cache, "r").read().split()[-1][0] | |
if y_decision == true_decision: | |
okay += 1 | |
print_str = "\033[92m ;-) \033[m" | |
else: | |
print_str = "\033[91m ;-( \033[m" | |
if true_decision == "F": | |
true_decision = f"\033[94m{true_decision}\033[m" | |
print( | |
f"{str(N).zfill(3)} --> {filename} (true_decision={true_decision}) {print_str} | \ | |
acc = {(okay/N)*100}% | {round(T2-T1,3)}") | |
print(f"FINAL | acc = {(okay/N)*100}%") | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment