Created
October 19, 2024 20:53
-
-
Save fabriziosalmi/793f479fef1ec6b0dd2a3cddfcbc810f to your computer and use it in GitHub Desktop.
Generate 10000 Kick Drums
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import random | |
from scipy.io.wavfile import write | |
from scipy.signal import sawtooth, square | |
from pydub import AudioSegment, effects | |
import os | |
import hashlib | |
import librosa | |
from librosa.feature import mfcc | |
import concurrent.futures | |
import shutil | |
# Output directory for generated samples | |
OUTPUT_DIR = "kick_samples" | |
FINAL_OUTPUT_DIR = "kick_samples_filtered" | |
EXTENDED_OUTPUT_DIR = "kick_samples_extended" | |
os.makedirs(OUTPUT_DIR, exist_ok=True) | |
os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True) | |
os.makedirs(EXTENDED_OUTPUT_DIR, exist_ok=True) | |
# Constants for sample generation | |
SAMPLE_RATE = 44100 # 44.1 kHz | |
DURATION = 1.0 # 1 second duration | |
BIT_DEPTH = 32 # 32-bit depth | |
NUM_SAMPLES = 2000000 # Increased number of samples for 200% extension | |
FINAL_NUM_SAMPLES = 20000 # Increased number of final unique samples | |
# Dictionary to store hashes of generated samples for similarity checking | |
unique_hashes = {} | |
# Options for different waveform types | |
WAVEFORM_TYPES = ['sine', 'sawtooth', 'square', 'triangle', 'pulse'] | |
# Extended options for more unique sound characteristics | |
FILTER_TYPES = ['lowpass', 'highpass', 'bandpass'] | |
DISTORTION_TYPES = ['soft', 'hard', 'bitcrush'] | |
def generate_waveform(t, frequency, waveform_type): | |
# Generate waveform based on the selected type | |
if waveform_type == 'sine': | |
return np.sin(2 * np.pi * frequency * t) | |
elif waveform_type == 'sawtooth': | |
return sawtooth(2 * np.pi * frequency * t) | |
elif waveform_type == 'square': | |
return square(2 * np.pi * frequency * t) | |
elif waveform_type == 'triangle': | |
return 2 * np.abs(2 * (t * frequency - np.floor(t * frequency + 0.5))) - 1 | |
elif waveform_type == 'pulse': | |
return np.sign(np.sin(2 * np.pi * frequency * t)) | |
else: | |
raise ValueError("Unsupported waveform type") | |
def generate_layered_kick(frequency=50, attack=0.01, decay=0.4, sustain_level=0.3, release=0.2, distortion='soft', reverb_amount=0.2, bitcrusher_depth=8, waveform_type='sine', filter_type='lowpass'): | |
# Time array for the duration of the sample | |
t = np.linspace(0, DURATION, int(SAMPLE_RATE * DURATION), endpoint=False) | |
# Generate base waveform (selected waveform + noise) | |
waveform = generate_waveform(t, frequency, waveform_type) | |
noise = np.random.normal(0, 0.05, len(t)) # Add some noise for a natural feel | |
waveform = 0.9 * waveform + 0.1 * noise # Mix waveform with noise for texture | |
# Envelope generator (ADSR) | |
attack_samples = int(attack * SAMPLE_RATE) | |
decay_samples = int(decay * SAMPLE_RATE) | |
release_samples = int(release * SAMPLE_RATE) | |
sustain_samples = len(t) - attack_samples - decay_samples - release_samples | |
# Create the ADSR envelope | |
envelope = np.concatenate([ | |
np.linspace(0, 1, attack_samples), # Attack phase | |
np.linspace(1, sustain_level, decay_samples), # Decay phase | |
np.ones(sustain_samples) * sustain_level, # Sustain phase | |
np.linspace(sustain_level, 0, release_samples) # Release phase | |
]) | |
if len(envelope) < len(t): | |
envelope = np.pad(envelope, (0, len(t) - len(envelope)), 'constant') | |
# Apply envelope to the waveform | |
waveform *= envelope | |
# Apply distortion | |
if distortion == 'soft': | |
waveform = np.tanh(waveform) # Soft clipping for warm saturation | |
elif distortion == 'hard': | |
waveform = np.clip(waveform * 2, -1, 1) # Hard clipping for aggressive distortion | |
elif distortion == 'bitcrush': | |
max_val = np.iinfo(np.int32).max | |
bit_depth_factor = 2 ** bitcrusher_depth | |
waveform = (np.round(waveform * bit_depth_factor) / bit_depth_factor) * max_val # Bitcrushing effect | |
else: | |
waveform = np.tanh(waveform) | |
# Apply filter effect using FFT | |
if filter_type == 'lowpass': | |
waveform_fft = np.fft.fft(waveform) | |
freqs = np.fft.fftfreq(len(waveform)) | |
waveform_fft[freqs > 0.05] = 0 # Zero out frequencies above threshold | |
waveform = np.fft.ifft(waveform_fft).real | |
elif filter_type == 'highpass': | |
waveform_fft = np.fft.fft(waveform) | |
freqs = np.fft.fftfreq(len(waveform)) | |
waveform_fft[freqs < 0.05] = 0 # Zero out frequencies below threshold | |
waveform = np.fft.ifft(waveform_fft).real | |
elif filter_type == 'bandpass': | |
waveform_fft = np.fft.fft(waveform) | |
freqs = np.fft.fftfreq(len(waveform)) | |
mask = (freqs > 0.01) & (freqs < 0.1) # Bandpass filter mask | |
waveform_fft[~mask] = 0 | |
waveform = np.fft.ifft(waveform_fft).real | |
# Convert to stereo by duplicating the mono waveform | |
stereo_waveform = np.stack([waveform, waveform], axis=-1) | |
return stereo_waveform.astype(np.int32) | |
def apply_effects(filename): | |
# Load audio with pydub for post-processing | |
audio = AudioSegment.from_wav(filename) | |
# Normalize audio to ensure consistent volume | |
audio = effects.normalize(audio) | |
# Apply saturation and EQ effects for added character | |
audio = audio.apply_gain_stereo(random.uniform(-3, 3), random.uniform(-3, 3)) | |
low_freq_boost = audio.low_pass_filter(100).apply_gain(3) # Boost low frequencies for more punch | |
mid_freq_cut = audio.high_pass_filter(500).apply_gain(-2) # Cut mid frequencies for clarity | |
audio = audio.overlay(low_freq_boost).overlay(mid_freq_cut) | |
# Add fade in and fade out for smoother transitions | |
audio = audio.fade_in(50).fade_out(50) | |
# Export the processed audio back to the file | |
audio.export(filename, format="wav", bitrate="32k") | |
def save_sample(sample, filename): | |
# Save the generated sample to a file | |
write(filename, SAMPLE_RATE, sample) | |
# Convert to 32-bit float WAV using pydub for better compatibility | |
audio = AudioSegment.from_wav(filename) | |
audio.export(filename, format="wav", bitrate="32k") | |
def calculate_similarity(filename1, filename2): | |
# Load both audio files and compute their MFCCs for similarity comparison | |
y1, sr1 = librosa.load(filename1, sr=SAMPLE_RATE) | |
y2, sr2 = librosa.load(filename2, sr=SAMPLE_RATE) | |
mfcc1 = mfcc(y=y1, sr=sr1) | |
mfcc2 = mfcc(y=y2, sr=sr2) | |
return np.linalg.norm(mfcc1 - mfcc2) # Calculate Euclidean distance between MFCCs | |
def generate_samples(num_samples): | |
generated_count = 0 | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
for i in range(num_samples): | |
# Randomize parameters for variability in each kick sample | |
frequency = random.uniform(40, 80) # Frequency in Hz | |
attack = random.uniform(0.005, 0.02) # Attack time in seconds | |
decay = random.uniform(0.1, 0.5) # Decay time in seconds | |
sustain_level = random.uniform(0.2, 0.5) # Sustain level (0 to 1) | |
release = random.uniform(0.05, 0.3) # Release time in seconds | |
distortion = random.choice(DISTORTION_TYPES) # Select a random distortion type | |
reverb_amount = random.uniform(0.1, 0.5) # Reverb amount | |
bitcrusher_depth = random.randint(6, 12) # Bitcrusher depth | |
waveform_type = random.choice(WAVEFORM_TYPES) # Select a random waveform type | |
filter_type = random.choice(FILTER_TYPES) # Select a random filter type | |
# Generate kick drum sample | |
sample = generate_layered_kick(frequency, attack, decay, sustain_level, release, distortion, reverb_amount, bitcrusher_depth, waveform_type, filter_type) | |
# Save the sample to a file | |
filename = os.path.join(OUTPUT_DIR, f"kick_{i + 1}.wav") | |
save_sample(sample, filename) | |
# Apply additional effects to the saved sample | |
apply_effects(filename) | |
# Calculate hash of the sample to check for uniqueness | |
with open(filename, 'rb') as f: | |
file_hash = hashlib.md5(f.read()).hexdigest() | |
# Only save unique samples to the final output directory | |
if file_hash not in unique_hashes: | |
unique_hashes[file_hash] = filename | |
generated_count += 1 | |
final_filename = os.path.join(FINAL_OUTPUT_DIR, f"kick_filtered_{generated_count}.wav") | |
os.rename(filename, final_filename) | |
print(f"Generated and kept: {final_filename}") | |
# Stop once we've collected enough unique samples | |
if generated_count >= FINAL_NUM_SAMPLES: | |
break | |
# Copy final unique samples to an extended directory for additional manual processing if needed | |
for file in os.listdir(FINAL_OUTPUT_DIR): | |
shutil.copy(os.path.join(FINAL_OUTPUT_DIR, file), EXTENDED_OUTPUT_DIR) | |
print(f"Successfully generated {generated_count} unique kick drum samples in '{FINAL_OUTPUT_DIR}' directory.") | |
print(f"Extended samples are available in '{EXTENDED_OUTPUT_DIR}' for further processing.") | |
if __name__ == "__main__": | |
generate_samples(NUM_SAMPLES) | |
print(f"Finished processing kick drum samples.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This Python script generates unique kick drum samples by synthesizing audio waveforms with various parameters, applying effects, and filtering for uniqueness. Here's a breakdown of the key components:
1. Imports and Setup
numpy
for math,random
for parameter randomness,scipy
for audio synthesis,pydub
for audio effects,librosa
for audio feature extraction, andconcurrent.futures
for parallel processing.2. Waveform Generation
generate_waveform()
: Creates waveforms (sine
,sawtooth
,square
, etc.) based on time and frequency.3. Sound Effects
tanh
), hard clipping, or bitcrushing.4. Saving and Post-Processing
save_sample()
: Writes synthesized audio to a WAV file and converts it to a specific bitrate.apply_effects()
: Usespydub
to:5. Uniqueness and Filtering
MD5
) for each sample to ensure no duplicates are saved.librosa
to calculate MFCCs (audio features) for comparing audio similarity.6. Parallel Sample Generation
concurrent.futures
to generate samples concurrently, where:7. Final Processing
Execution
NUM_SAMPLES
kick drum samples and filters them down toFINAL_NUM_SAMPLES
unique outputs, saving them with enhanced effects in organized directories.This setup enables the creation of a large dataset of diverse and realistic kick drum samples for music production, sound design, or machine learning applications.