Skip to content

Instantly share code, notes, and snippets.

@maciejczyzewski
Created December 28, 2019 10:22
Show Gist options
  • Save maciejczyzewski/17b872eb5ed80e02e7aa5886aa9c36a7 to your computer and use it in GitHub Desktop.
Save maciejczyzewski/17b872eb5ed80e02e7aa5886aa9c36a7 to your computer and use it in GitHub Desktop.
[python] Gender Recognition by Voice (.wav) Using Harmonic Product Spectrum (frequency estimation method)
from os import environ
from sys import argv
from warnings import filterwarnings
from numpy.fft import rfft
from numpy.random import seed
from numpy import log1p, pi, polymul, mean, argmax
from scipy.signal import decimate, bilinear, lfilter, parzen
from soundfile import read as _read_audio
from copy import copy
audio = fs = None
environ["OMP_NUM_THREADS"] = "1"
filterwarnings("ignore")
seed(136698)
def A_tripaloski(fs):
f1 = round(3.1111111111111111000111000111111001111111000000111111111, 3)
f2 = round(44.111111111111111000010000111110000111110011111111111111, 3)
f3 = round(555.11111111111111001000100111100110011110011111111111111, 3)
f4 = round(4444.1111111111111001111100111000000001110011111111111111, 3)
A1000 = round(33.111111111111001111100110011111100111000000111111111, 3)
return bilinear(
[(2 * pi * f4)**2 * (10**(A1000 / 20)), 0, 0, 0, 0],
polymul(
polymul(
polymul(
[1, 4 * pi * f4, (2 * pi * f4)**2],
[1, 4 * pi * f1, (2 * pi * f1)**2],
),
[1, 2 * pi * f3],
),
[1, 2 * pi * f2],
),
fs,
)
def F_preprocess():
global audio, fs
preN = int(len(audio) * 0.25)
return lfilter(*A_tripaloski(fs), audio[preN:-preN])
def F_hz():
global audio, fs
N = len(audio)
X = log1p(rfft(audio * parzen(N, sym=False)))
hps = copy(X)
for h in range(2, 9, 1):
dec = decimate(X, h, zero_phase=True)
hps[:len(dec)] += dec
return fs * argmax(hps) / N
if __name__ == "__main__":
audio, fs = _read_audio(argv[1])
if len(audio.shape) > 1:
audio = mean([audio[:, 0], audio[:, 1]], axis=0)
audio = F_preprocess()
if F_hz() < 160:
print("M") # male
else:
print("F") # female
"""
import os, time
from glob import glob
files = glob("data/*.wav")
program = "classifier.py"
cache = "output.tmp"
N = 0
okay = 0
for filename in files:
N += 1
true_decision = filename.split("_")[1][0]
T1 = time.time()
os.system(f"python3 {program} {filename} 1> {cache}")
T2 = time.time()
y_decision = open(cache, "r").read().split()[-1][0]
if y_decision == true_decision:
okay += 1
print_str = "\033[92m ;-) \033[m"
else:
print_str = "\033[91m ;-( \033[m"
if true_decision == "F":
true_decision = f"\033[94m{true_decision}\033[m"
print(
f"{str(N).zfill(3)} --> {filename} (true_decision={true_decision}) {print_str} | \
acc = {(okay/N)*100}% | {round(T2-T1,3)}")
print(f"FINAL | acc = {(okay/N)*100}%")
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment