Skip to content

Instantly share code, notes, and snippets.

@mewa
Created January 8, 2017 21:58
Show Gist options
  • Save mewa/63425addaac74aa22b49ecf6df3c4241 to your computer and use it in GitHub Desktop.
Save mewa/63425addaac74aa22b49ecf6df3c4241 to your computer and use it in GitHub Desktop.
Basic voiced speech gender detection
#!/usr/local/bin/python3
import numpy as np
import scipy as sp
from pylab import *
import wave
import os
import struct
def wavData(f):
freq = f.getframerate()
numFrames = f.getnframes()
numChannels = f.getnchannels()
#print("Number of frames:", numFrames)
#print("Sample width:", f.getsampwidth())
#print("Sampling frequency:", freq)
#print("Channels:", numChannels)
data = f.readframes(numFrames)
data = struct.unpack("<{0}{1}".format(numFrames * numChannels, 'h'), data)
return (freq, np.array(data[::numChannels]))
def process(freq, signal):
data = sp.fft(signal)[1:]
data = np.abs(data)
data = data / len(signal) * 2
freqs = np.linspace(0, freq, len(data))
max = np.argmax(data)
#50 = x * freq / len
#50 * len / freq = x
min = int(np.floor(80 * len(signal) / freq))
max = int(np.ceil(255 * len(signal) / freq))
print(">>> min", min, "\t> max", max)
maxArg = np.argmax(data[min:max]) + min
result = maxArg * freq / len(signal)
print(">>>>>>>>> dominating frequency:", result)
return result
print("Start")
for fname in os.listdir("train"):
if fname.endswith(".wav"):
fname = "train/" + fname;
print('Processing', fname)
with wave.open(fname, "rb") as f:
freq, frames = wavData(f)
l = min(40000, len(frames))
ret = process(freq, frames)
if ret < 172.5:
detected = 'M'
else:
detected = 'K'
print("detected", detected, detected in fname)
if not detected in fname:
open(fname + "_" + str(ret) + "_" + detected + ".wrongresult", "w").close()
print("End")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment