Skip to content

Instantly share code, notes, and snippets.

@fabriziosalmi
Created October 19, 2024 20:53
Show Gist options
  • Save fabriziosalmi/793f479fef1ec6b0dd2a3cddfcbc810f to your computer and use it in GitHub Desktop.
Save fabriziosalmi/793f479fef1ec6b0dd2a3cddfcbc810f to your computer and use it in GitHub Desktop.
Generate 10000 Kick Drums
import numpy as np
import random
from scipy.io.wavfile import write
from scipy.signal import sawtooth, square
from pydub import AudioSegment, effects
import os
import hashlib
import librosa
from librosa.feature import mfcc
import concurrent.futures
import shutil
# Output directory for generated samples
OUTPUT_DIR = "kick_samples"
FINAL_OUTPUT_DIR = "kick_samples_filtered"
EXTENDED_OUTPUT_DIR = "kick_samples_extended"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True)
os.makedirs(EXTENDED_OUTPUT_DIR, exist_ok=True)
# Constants for sample generation
SAMPLE_RATE = 44100 # 44.1 kHz
DURATION = 1.0 # 1 second duration
BIT_DEPTH = 32 # 32-bit depth
NUM_SAMPLES = 2000000 # Increased number of samples for 200% extension
FINAL_NUM_SAMPLES = 20000 # Increased number of final unique samples
# Dictionary to store hashes of generated samples for similarity checking
unique_hashes = {}
# Options for different waveform types
WAVEFORM_TYPES = ['sine', 'sawtooth', 'square', 'triangle', 'pulse']
# Extended options for more unique sound characteristics
FILTER_TYPES = ['lowpass', 'highpass', 'bandpass']
DISTORTION_TYPES = ['soft', 'hard', 'bitcrush']
def generate_waveform(t, frequency, waveform_type):
# Generate waveform based on the selected type
if waveform_type == 'sine':
return np.sin(2 * np.pi * frequency * t)
elif waveform_type == 'sawtooth':
return sawtooth(2 * np.pi * frequency * t)
elif waveform_type == 'square':
return square(2 * np.pi * frequency * t)
elif waveform_type == 'triangle':
return 2 * np.abs(2 * (t * frequency - np.floor(t * frequency + 0.5))) - 1
elif waveform_type == 'pulse':
return np.sign(np.sin(2 * np.pi * frequency * t))
else:
raise ValueError("Unsupported waveform type")
def generate_layered_kick(frequency=50, attack=0.01, decay=0.4, sustain_level=0.3, release=0.2, distortion='soft', reverb_amount=0.2, bitcrusher_depth=8, waveform_type='sine', filter_type='lowpass'):
# Time array for the duration of the sample
t = np.linspace(0, DURATION, int(SAMPLE_RATE * DURATION), endpoint=False)
# Generate base waveform (selected waveform + noise)
waveform = generate_waveform(t, frequency, waveform_type)
noise = np.random.normal(0, 0.05, len(t)) # Add some noise for a natural feel
waveform = 0.9 * waveform + 0.1 * noise # Mix waveform with noise for texture
# Envelope generator (ADSR)
attack_samples = int(attack * SAMPLE_RATE)
decay_samples = int(decay * SAMPLE_RATE)
release_samples = int(release * SAMPLE_RATE)
sustain_samples = len(t) - attack_samples - decay_samples - release_samples
# Create the ADSR envelope
envelope = np.concatenate([
np.linspace(0, 1, attack_samples), # Attack phase
np.linspace(1, sustain_level, decay_samples), # Decay phase
np.ones(sustain_samples) * sustain_level, # Sustain phase
np.linspace(sustain_level, 0, release_samples) # Release phase
])
if len(envelope) < len(t):
envelope = np.pad(envelope, (0, len(t) - len(envelope)), 'constant')
# Apply envelope to the waveform
waveform *= envelope
# Apply distortion
if distortion == 'soft':
waveform = np.tanh(waveform) # Soft clipping for warm saturation
elif distortion == 'hard':
waveform = np.clip(waveform * 2, -1, 1) # Hard clipping for aggressive distortion
elif distortion == 'bitcrush':
max_val = np.iinfo(np.int32).max
bit_depth_factor = 2 ** bitcrusher_depth
waveform = (np.round(waveform * bit_depth_factor) / bit_depth_factor) * max_val # Bitcrushing effect
else:
waveform = np.tanh(waveform)
# Apply filter effect using FFT
if filter_type == 'lowpass':
waveform_fft = np.fft.fft(waveform)
freqs = np.fft.fftfreq(len(waveform))
waveform_fft[freqs > 0.05] = 0 # Zero out frequencies above threshold
waveform = np.fft.ifft(waveform_fft).real
elif filter_type == 'highpass':
waveform_fft = np.fft.fft(waveform)
freqs = np.fft.fftfreq(len(waveform))
waveform_fft[freqs < 0.05] = 0 # Zero out frequencies below threshold
waveform = np.fft.ifft(waveform_fft).real
elif filter_type == 'bandpass':
waveform_fft = np.fft.fft(waveform)
freqs = np.fft.fftfreq(len(waveform))
mask = (freqs > 0.01) & (freqs < 0.1) # Bandpass filter mask
waveform_fft[~mask] = 0
waveform = np.fft.ifft(waveform_fft).real
# Convert to stereo by duplicating the mono waveform
stereo_waveform = np.stack([waveform, waveform], axis=-1)
return stereo_waveform.astype(np.int32)
def apply_effects(filename):
# Load audio with pydub for post-processing
audio = AudioSegment.from_wav(filename)
# Normalize audio to ensure consistent volume
audio = effects.normalize(audio)
# Apply saturation and EQ effects for added character
audio = audio.apply_gain_stereo(random.uniform(-3, 3), random.uniform(-3, 3))
low_freq_boost = audio.low_pass_filter(100).apply_gain(3) # Boost low frequencies for more punch
mid_freq_cut = audio.high_pass_filter(500).apply_gain(-2) # Cut mid frequencies for clarity
audio = audio.overlay(low_freq_boost).overlay(mid_freq_cut)
# Add fade in and fade out for smoother transitions
audio = audio.fade_in(50).fade_out(50)
# Export the processed audio back to the file
audio.export(filename, format="wav", bitrate="32k")
def save_sample(sample, filename):
# Save the generated sample to a file
write(filename, SAMPLE_RATE, sample)
# Convert to 32-bit float WAV using pydub for better compatibility
audio = AudioSegment.from_wav(filename)
audio.export(filename, format="wav", bitrate="32k")
def calculate_similarity(filename1, filename2):
# Load both audio files and compute their MFCCs for similarity comparison
y1, sr1 = librosa.load(filename1, sr=SAMPLE_RATE)
y2, sr2 = librosa.load(filename2, sr=SAMPLE_RATE)
mfcc1 = mfcc(y=y1, sr=sr1)
mfcc2 = mfcc(y=y2, sr=sr2)
return np.linalg.norm(mfcc1 - mfcc2) # Calculate Euclidean distance between MFCCs
def generate_samples(num_samples):
generated_count = 0
with concurrent.futures.ThreadPoolExecutor() as executor:
for i in range(num_samples):
# Randomize parameters for variability in each kick sample
frequency = random.uniform(40, 80) # Frequency in Hz
attack = random.uniform(0.005, 0.02) # Attack time in seconds
decay = random.uniform(0.1, 0.5) # Decay time in seconds
sustain_level = random.uniform(0.2, 0.5) # Sustain level (0 to 1)
release = random.uniform(0.05, 0.3) # Release time in seconds
distortion = random.choice(DISTORTION_TYPES) # Select a random distortion type
reverb_amount = random.uniform(0.1, 0.5) # Reverb amount
bitcrusher_depth = random.randint(6, 12) # Bitcrusher depth
waveform_type = random.choice(WAVEFORM_TYPES) # Select a random waveform type
filter_type = random.choice(FILTER_TYPES) # Select a random filter type
# Generate kick drum sample
sample = generate_layered_kick(frequency, attack, decay, sustain_level, release, distortion, reverb_amount, bitcrusher_depth, waveform_type, filter_type)
# Save the sample to a file
filename = os.path.join(OUTPUT_DIR, f"kick_{i + 1}.wav")
save_sample(sample, filename)
# Apply additional effects to the saved sample
apply_effects(filename)
# Calculate hash of the sample to check for uniqueness
with open(filename, 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
# Only save unique samples to the final output directory
if file_hash not in unique_hashes:
unique_hashes[file_hash] = filename
generated_count += 1
final_filename = os.path.join(FINAL_OUTPUT_DIR, f"kick_filtered_{generated_count}.wav")
os.rename(filename, final_filename)
print(f"Generated and kept: {final_filename}")
# Stop once we've collected enough unique samples
if generated_count >= FINAL_NUM_SAMPLES:
break
# Copy final unique samples to an extended directory for additional manual processing if needed
for file in os.listdir(FINAL_OUTPUT_DIR):
shutil.copy(os.path.join(FINAL_OUTPUT_DIR, file), EXTENDED_OUTPUT_DIR)
print(f"Successfully generated {generated_count} unique kick drum samples in '{FINAL_OUTPUT_DIR}' directory.")
print(f"Extended samples are available in '{EXTENDED_OUTPUT_DIR}' for further processing.")
if __name__ == "__main__":
generate_samples(NUM_SAMPLES)
print(f"Finished processing kick drum samples.")
@fabriziosalmi
Copy link
Author

This Python script generates unique kick drum samples by synthesizing audio waveforms with various parameters, applying effects, and filtering for uniqueness. Here's a breakdown of the key components:

1. Imports and Setup

  • Libraries: It uses libraries like numpy for math, random for parameter randomness, scipy for audio synthesis, pydub for audio effects, librosa for audio feature extraction, and concurrent.futures for parallel processing.
  • Directories: Creates three directories to manage output files: raw generated samples, filtered unique samples, and extended samples for further processing.
  • Constants: Defines the sampling rate, duration, bit depth, and number of samples to generate.

2. Waveform Generation

  • generate_waveform(): Creates waveforms (sine, sawtooth, square, etc.) based on time and frequency.
  • Noise: Adds random noise to enhance realism.
  • ADSR Envelope: Shapes the audio volume over time with an Attack-Decay-Sustain-Release (ADSR) model.

3. Sound Effects

  • Distortion: Applies soft clipping (tanh), hard clipping, or bitcrushing.
  • Filters: Implements lowpass, highpass, and bandpass filters using FFT to manipulate frequency components.
  • Stereo Conversion: Converts the mono signal to stereo.

4. Saving and Post-Processing

  • save_sample(): Writes synthesized audio to a WAV file and converts it to a specific bitrate.
  • apply_effects(): Uses pydub to:
    • Normalize volume.
    • Add EQ effects like low-pass boost or high-pass cut.
    • Add fade-ins and fade-outs for smoother transitions.

5. Uniqueness and Filtering

  • Hashing: Calculates a hash (MD5) for each sample to ensure no duplicates are saved.
  • Similarity Check: Uses librosa to calculate MFCCs (audio features) for comparing audio similarity.

6. Parallel Sample Generation

  • Uses concurrent.futures to generate samples concurrently, where:
    • Parameters like frequency, ADSR settings, distortion type, and waveform type are randomized for variability.
    • Filters and effects are applied after the base synthesis.

7. Final Processing

  • Only unique samples are moved to a filtered directory.
  • Copies the filtered samples to an extended directory for further manual processing if needed.

Execution

  • The script generates NUM_SAMPLES kick drum samples and filters them down to FINAL_NUM_SAMPLES unique outputs, saving them with enhanced effects in organized directories.

This setup enables the creation of a large dataset of diverse and realistic kick drum samples for music production, sound design, or machine learning applications.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment