Skip to content

Instantly share code, notes, and snippets.

@Dbhardwaj99
Last active August 6, 2024 10:09
Show Gist options
  • Save Dbhardwaj99/fa09ab2a817af545d1694eb092b66442 to your computer and use it in GitHub Desktop.
Save Dbhardwaj99/fa09ab2a817af545d1694eb092b66442 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
import sounddevice as sd
def generate_random_sound(duration=2.0, sample_rate=16000):
"""
Generate a random sound.
Parameters:
duration (float): Duration of the sound in seconds.
sample_rate (int): Sample rate of the sound.
Returns:
numpy.ndarray: The generated sound.
"""
num_samples = int(duration * sample_rate)
sound = np.random.uniform(-1.0, 1.0, num_samples).astype(np.float32)
return sound, sample_rate
def show_spectrogram(sound, sample_rate):
"""
Display the spectrogram of the sound.
Parameters:
sound (numpy.ndarray): The audio sound.
sample_rate (int): The sample rate of the audio.
"""
plt.specgram(sound, NFFT=1024, Fs=sample_rate, noverlap=900, cmap='viridis')
plt.title('Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(format='%+2.0f dB')
plt.show()
def apply_frequency_masking(spectrogram, freq_mask_param):
"""
Apply frequency masking to the spectrogram.
Parameters:
spectrogram (numpy.ndarray): The spectrogram.
freq_mask_param (int): The parameter for frequency masking.
Returns:
numpy.ndarray: The masked spectrogram.
"""
num_freqs, num_times = spectrogram.shape
for _ in range(freq_mask_param):
f = np.random.randint(0, num_freqs)
spectrogram[f, :] = np.min(spectrogram)
return spectrogram
def apply_time_masking(spectrogram, time_mask_param):
"""
Apply time masking to the spectrogram.
Parameters:
spectrogram (numpy.ndarray): The spectrogram.
time_mask_param (int): The parameter for time masking.
Returns:
numpy.ndarray: The masked spectrogram.
"""
num_freqs, num_times = spectrogram.shape
for _ in range(time_mask_param):
t = np.random.randint(0, num_times)
spectrogram[:, t] = np.min(spectrogram)
return spectrogram
def play_sound(sound, sample_rate):
"""
Play the sound using the sounddevice library.
Parameters:
sound (numpy.ndarray): The audio sound.
sample_rate (int): The sample rate of the audio.
"""
sd.play(sound, sample_rate)
sd.wait() # Wait until the sound has finished playing
def main():
# Generate a random sound
sound, sample_rate = generate_random_sound()
# Play the original sound
print("Playing original sound...")
play_sound(sound, sample_rate)
# Display the original spectrogram
print("Original Spectrogram:")
plt.figure()
show_spectrogram(sound, sample_rate)
# Compute the spectrogram
frequencies, times, spectrogram = plt.specgram(sound, NFFT=1024, Fs=sample_rate, noverlap=900, cmap='viridis')
plt.close() # Close the figure created by specgram
# Apply frequency masking and display the result
print("Frequency Masked Spectrogram:")
freq_masked_spectrogram = apply_frequency_masking(spectrogram.copy(), freq_mask_param=10)
plt.figure()
plt.imshow(freq_masked_spectrogram, cmap='viridis', origin='lower', aspect='auto',
extent=[times.min(), times.max(), frequencies.min(), frequencies.max()])
plt.title('Frequency Masked Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(format='%+2.0f dB')
plt.show()
# Apply time masking and display the result
print("Time Masked Spectrogram:")
time_masked_spectrogram = apply_time_masking(spectrogram.copy(), time_mask_param=10)
plt.figure()
plt.imshow(time_masked_spectrogram, cmap='viridis', origin='lower', aspect='auto',
extent=[times.min(), times.max(), frequencies.min(), frequencies.max()])
plt.title('Time Masked Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(format='%+2.0f dB')
plt.show()
if __name__ == "__main__":
main()
@Dbhardwaj99
Copy link
Author

Install required libraries

!pip install numpy matplotlib scikit-learn opencv-python torch torchaudio ipython pandas librosa

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment