Created
January 16, 2021 13:49
-
-
Save mkvenkit/e68777c98c893544eecea5046caa3fd7 to your computer and use it in GitHub Desktop.
Simple audio training - STFT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def stft(x): | |
| f, t, spec = signal.stft(x.numpy(), fs=16000, nperseg=255, noverlap = 124, nfft=256) | |
| return tf.convert_to_tensor(np.abs(spec)) | |
| def get_spectrogram(waveform): | |
| # Padding for files with less than 16000 samples | |
| zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32) | |
| # Concatenate audio with padding so that all audio clips will be of the | |
| # same length | |
| waveform = tf.cast(waveform, tf.float32) | |
| equal_length = tf.concat([waveform, zero_padding], 0) | |
| spectrogram = tf.py_function(func=stft, inp=[equal_length], Tout=tf.float32) | |
| spectrogram.set_shape((129, 124)) | |
| return spectrogram |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment