fabriziosalmi · October 19, 2024 20:53 · fabriziosalmi · Nov 30, 2024
diff --git a/generate_kickdrums.py b/generate_kickdrums.py
 import numpy as np
 import random
 from scipy.io.wavfile import write
 from scipy.signal import sawtooth, square
 from pydub import AudioSegment, effects
 import os
 import hashlib
 import librosa
 from librosa.feature import mfcc
 import concurrent.futures
 import shutil

 # Output directory for generated samples
 OUTPUT_DIR = "kick_samples"
 FINAL_OUTPUT_DIR = "kick_samples_filtered"
 EXTENDED_OUTPUT_DIR = "kick_samples_extended"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True)
 os.makedirs(EXTENDED_OUTPUT_DIR, exist_ok=True)

 # Constants for sample generation
 SAMPLE_RATE = 44100  # 44.1 kHz
 DURATION = 1.0  # 1 second duration
 BIT_DEPTH = 32  # 32-bit depth
 NUM_SAMPLES = 2000000  # Increased number of samples for 200% extension
 FINAL_NUM_SAMPLES = 20000  # Increased number of final unique samples

 # Dictionary to store hashes of generated samples for similarity checking
 unique_hashes = {}

 # Options for different waveform types
 WAVEFORM_TYPES = ['sine', 'sawtooth', 'square', 'triangle', 'pulse']

 # Extended options for more unique sound characteristics
 FILTER_TYPES = ['lowpass', 'highpass', 'bandpass']
 DISTORTION_TYPES = ['soft', 'hard', 'bitcrush']


 def generate_waveform(t, frequency, waveform_type):
    # Generate waveform based on the selected type
    if waveform_type == 'sine':
        return np.sin(2 * np.pi * frequency * t)
    elif waveform_type == 'sawtooth':
        return sawtooth(2 * np.pi * frequency * t)
    elif waveform_type == 'square':
        return square(2 * np.pi * frequency * t)
    elif waveform_type == 'triangle':
        return 2 * np.abs(2 * (t * frequency - np.floor(t * frequency + 0.5))) - 1
    elif waveform_type == 'pulse':
        return np.sign(np.sin(2 * np.pi * frequency * t))
    else:
        raise ValueError("Unsupported waveform type")


 def generate_layered_kick(frequency=50, attack=0.01, decay=0.4, sustain_level=0.3, release=0.2, distortion='soft', reverb_amount=0.2, bitcrusher_depth=8, waveform_type='sine', filter_type='lowpass'):
    # Time array for the duration of the sample
    t = np.linspace(0, DURATION, int(SAMPLE_RATE * DURATION), endpoint=False)
    
    # Generate base waveform (selected waveform + noise)
    waveform = generate_waveform(t, frequency, waveform_type)
    noise = np.random.normal(0, 0.05, len(t))  # Add some noise for a natural feel
    waveform = 0.9 * waveform + 0.1 * noise  # Mix waveform with noise for texture
    
    # Envelope generator (ADSR)
    attack_samples = int(attack * SAMPLE_RATE)
    decay_samples = int(decay * SAMPLE_RATE)
    release_samples = int(release * SAMPLE_RATE)
    sustain_samples = len(t) - attack_samples - decay_samples - release_samples
    
    # Create the ADSR envelope
    envelope = np.concatenate([
        np.linspace(0, 1, attack_samples),  # Attack phase
        np.linspace(1, sustain_level, decay_samples),  # Decay phase
        np.ones(sustain_samples) * sustain_level,  # Sustain phase
        np.linspace(sustain_level, 0, release_samples)  # Release phase
    ])
    if len(envelope) < len(t):
        envelope = np.pad(envelope, (0, len(t) - len(envelope)), 'constant')
    
    # Apply envelope to the waveform
    waveform *= envelope
    
    # Apply distortion
    if distortion == 'soft':
        waveform = np.tanh(waveform)  # Soft clipping for warm saturation
    elif distortion == 'hard':
        waveform = np.clip(waveform * 2, -1, 1)  # Hard clipping for aggressive distortion
    elif distortion == 'bitcrush':
        max_val = np.iinfo(np.int32).max
        bit_depth_factor = 2 ** bitcrusher_depth
        waveform = (np.round(waveform * bit_depth_factor) / bit_depth_factor) * max_val  # Bitcrushing effect
    else:
        waveform = np.tanh(waveform)
    
    # Apply filter effect using FFT
    if filter_type == 'lowpass':
        waveform_fft = np.fft.fft(waveform)
        freqs = np.fft.fftfreq(len(waveform))
        waveform_fft[freqs > 0.05] = 0  # Zero out frequencies above threshold
        waveform = np.fft.ifft(waveform_fft).real
    elif filter_type == 'highpass':
        waveform_fft = np.fft.fft(waveform)
        freqs = np.fft.fftfreq(len(waveform))
        waveform_fft[freqs < 0.05] = 0  # Zero out frequencies below threshold
        waveform = np.fft.ifft(waveform_fft).real
    elif filter_type == 'bandpass':
        waveform_fft = np.fft.fft(waveform)
        freqs = np.fft.fftfreq(len(waveform))
        mask = (freqs > 0.01) & (freqs < 0.1)  # Bandpass filter mask
        waveform_fft[~mask] = 0
        waveform = np.fft.ifft(waveform_fft).real
    
    # Convert to stereo by duplicating the mono waveform
    stereo_waveform = np.stack([waveform, waveform], axis=-1)
    
    return stereo_waveform.astype(np.int32)


 def apply_effects(filename):
    # Load audio with pydub for post-processing
    audio = AudioSegment.from_wav(filename)
    
    # Normalize audio to ensure consistent volume
    audio = effects.normalize(audio)
    
    # Apply saturation and EQ effects for added character
    audio = audio.apply_gain_stereo(random.uniform(-3, 3), random.uniform(-3, 3))
    low_freq_boost = audio.low_pass_filter(100).apply_gain(3)  # Boost low frequencies for more punch
    mid_freq_cut = audio.high_pass_filter(500).apply_gain(-2)  # Cut mid frequencies for clarity
    audio = audio.overlay(low_freq_boost).overlay(mid_freq_cut)
    
    # Add fade in and fade out for smoother transitions
    audio = audio.fade_in(50).fade_out(50)
    
    # Export the processed audio back to the file
    audio.export(filename, format="wav", bitrate="32k")


 def save_sample(sample, filename):
    # Save the generated sample to a file
    write(filename, SAMPLE_RATE, sample)
    # Convert to 32-bit float WAV using pydub for better compatibility
    audio = AudioSegment.from_wav(filename)
    audio.export(filename, format="wav", bitrate="32k")


 def calculate_similarity(filename1, filename2):
    # Load both audio files and compute their MFCCs for similarity comparison
    y1, sr1 = librosa.load(filename1, sr=SAMPLE_RATE)
    y2, sr2 = librosa.load(filename2, sr=SAMPLE_RATE)
    mfcc1 = mfcc(y=y1, sr=sr1)
    mfcc2 = mfcc(y=y2, sr=sr2)
    return np.linalg.norm(mfcc1 - mfcc2)  # Calculate Euclidean distance between MFCCs


 def generate_samples(num_samples):
    generated_count = 0
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i in range(num_samples):
            # Randomize parameters for variability in each kick sample
            frequency = random.uniform(40, 80)  # Frequency in Hz
            attack = random.uniform(0.005, 0.02)  # Attack time in seconds
            decay = random.uniform(0.1, 0.5)  # Decay time in seconds
            sustain_level = random.uniform(0.2, 0.5)  # Sustain level (0 to 1)
            release = random.uniform(0.05, 0.3)  # Release time in seconds
            distortion = random.choice(DISTORTION_TYPES)  # Select a random distortion type
            reverb_amount = random.uniform(0.1, 0.5)  # Reverb amount
            bitcrusher_depth = random.randint(6, 12)  # Bitcrusher depth
            waveform_type = random.choice(WAVEFORM_TYPES)  # Select a random waveform type
            filter_type = random.choice(FILTER_TYPES)  # Select a random filter type

            # Generate kick drum sample
            sample = generate_layered_kick(frequency, attack, decay, sustain_level, release, distortion, reverb_amount, bitcrusher_depth, waveform_type, filter_type)

            # Save the sample to a file
            filename = os.path.join(OUTPUT_DIR, f"kick_{i + 1}.wav")
            save_sample(sample, filename)

            # Apply additional effects to the saved sample
            apply_effects(filename)

            # Calculate hash of the sample to check for uniqueness
            with open(filename, 'rb') as f:
                file_hash = hashlib.md5(f.read()).hexdigest()

            # Only save unique samples to the final output directory
            if file_hash not in unique_hashes:
                unique_hashes[file_hash] = filename
                generated_count += 1
                final_filename = os.path.join(FINAL_OUTPUT_DIR, f"kick_filtered_{generated_count}.wav")
                os.rename(filename, final_filename)
                print(f"Generated and kept: {final_filename}")

            # Stop once we've collected enough unique samples
            if generated_count >= FINAL_NUM_SAMPLES:
                break

    # Copy final unique samples to an extended directory for additional manual processing if needed
    for file in os.listdir(FINAL_OUTPUT_DIR):
        shutil.copy(os.path.join(FINAL_OUTPUT_DIR, file), EXTENDED_OUTPUT_DIR)

    print(f"Successfully generated {generated_count} unique kick drum samples in '{FINAL_OUTPUT_DIR}' directory.")
    print(f"Extended samples are available in '{EXTENDED_OUTPUT_DIR}' for further processing.")


 if __name__ == "__main__":
    generate_samples(NUM_SAMPLES)
    print(f"Finished processing kick drum samples.")
	import numpy as np
	import random
	from scipy.io.wavfile import write
	from scipy.signal import sawtooth, square
	from pydub import AudioSegment, effects
	import os
	import hashlib
	import librosa
	from librosa.feature import mfcc
	import concurrent.futures
	import shutil

	# Output directory for generated samples
	OUTPUT_DIR = "kick_samples"
	FINAL_OUTPUT_DIR = "kick_samples_filtered"
	EXTENDED_OUTPUT_DIR = "kick_samples_extended"
	os.makedirs(OUTPUT_DIR, exist_ok=True)
	os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True)
	os.makedirs(EXTENDED_OUTPUT_DIR, exist_ok=True)

	# Constants for sample generation
	SAMPLE_RATE = 44100 # 44.1 kHz
	DURATION = 1.0 # 1 second duration
	BIT_DEPTH = 32 # 32-bit depth
	NUM_SAMPLES = 2000000 # Increased number of samples for 200% extension
	FINAL_NUM_SAMPLES = 20000 # Increased number of final unique samples

	# Dictionary to store hashes of generated samples for similarity checking
	unique_hashes = {}

	# Options for different waveform types
	WAVEFORM_TYPES = ['sine', 'sawtooth', 'square', 'triangle', 'pulse']

	# Extended options for more unique sound characteristics
	FILTER_TYPES = ['lowpass', 'highpass', 'bandpass']
	DISTORTION_TYPES = ['soft', 'hard', 'bitcrush']


	def generate_waveform(t, frequency, waveform_type):
	# Generate waveform based on the selected type
	if waveform_type == 'sine':
	return np.sin(2 * np.pi * frequency * t)
	elif waveform_type == 'sawtooth':
	return sawtooth(2 * np.pi * frequency * t)
	elif waveform_type == 'square':
	return square(2 * np.pi * frequency * t)
	elif waveform_type == 'triangle':
	return 2 * np.abs(2 * (t * frequency - np.floor(t * frequency + 0.5))) - 1
	elif waveform_type == 'pulse':
	return np.sign(np.sin(2 * np.pi * frequency * t))
	else:
	raise ValueError("Unsupported waveform type")


	def generate_layered_kick(frequency=50, attack=0.01, decay=0.4, sustain_level=0.3, release=0.2, distortion='soft', reverb_amount=0.2, bitcrusher_depth=8, waveform_type='sine', filter_type='lowpass'):
	# Time array for the duration of the sample
	t = np.linspace(0, DURATION, int(SAMPLE_RATE * DURATION), endpoint=False)

	# Generate base waveform (selected waveform + noise)
	waveform = generate_waveform(t, frequency, waveform_type)
	noise = np.random.normal(0, 0.05, len(t)) # Add some noise for a natural feel
	waveform = 0.9 * waveform + 0.1 * noise # Mix waveform with noise for texture

	# Envelope generator (ADSR)
	attack_samples = int(attack * SAMPLE_RATE)
	decay_samples = int(decay * SAMPLE_RATE)
	release_samples = int(release * SAMPLE_RATE)
	sustain_samples = len(t) - attack_samples - decay_samples - release_samples

	# Create the ADSR envelope
	envelope = np.concatenate([
	np.linspace(0, 1, attack_samples), # Attack phase
	np.linspace(1, sustain_level, decay_samples), # Decay phase
	np.ones(sustain_samples) * sustain_level, # Sustain phase
	np.linspace(sustain_level, 0, release_samples) # Release phase
	])
	if len(envelope) < len(t):
	envelope = np.pad(envelope, (0, len(t) - len(envelope)), 'constant')

	# Apply envelope to the waveform
	waveform *= envelope

	# Apply distortion
	if distortion == 'soft':
	waveform = np.tanh(waveform) # Soft clipping for warm saturation
	elif distortion == 'hard':
	waveform = np.clip(waveform * 2, -1, 1) # Hard clipping for aggressive distortion
	elif distortion == 'bitcrush':
	max_val = np.iinfo(np.int32).max
	bit_depth_factor = 2 ** bitcrusher_depth
	waveform = (np.round(waveform * bit_depth_factor) / bit_depth_factor) * max_val # Bitcrushing effect
	else:
	waveform = np.tanh(waveform)

	# Apply filter effect using FFT
	if filter_type == 'lowpass':
	waveform_fft = np.fft.fft(waveform)
	freqs = np.fft.fftfreq(len(waveform))
	waveform_fft[freqs > 0.05] = 0 # Zero out frequencies above threshold
	waveform = np.fft.ifft(waveform_fft).real
	elif filter_type == 'highpass':
	waveform_fft = np.fft.fft(waveform)
	freqs = np.fft.fftfreq(len(waveform))
	waveform_fft[freqs < 0.05] = 0 # Zero out frequencies below threshold
	waveform = np.fft.ifft(waveform_fft).real
	elif filter_type == 'bandpass':
	waveform_fft = np.fft.fft(waveform)
	freqs = np.fft.fftfreq(len(waveform))
	mask = (freqs > 0.01) & (freqs < 0.1) # Bandpass filter mask
	waveform_fft[~mask] = 0
	waveform = np.fft.ifft(waveform_fft).real

	# Convert to stereo by duplicating the mono waveform
	stereo_waveform = np.stack([waveform, waveform], axis=-1)

	return stereo_waveform.astype(np.int32)


	def apply_effects(filename):
	# Load audio with pydub for post-processing
	audio = AudioSegment.from_wav(filename)

	# Normalize audio to ensure consistent volume
	audio = effects.normalize(audio)

	# Apply saturation and EQ effects for added character
	audio = audio.apply_gain_stereo(random.uniform(-3, 3), random.uniform(-3, 3))
	low_freq_boost = audio.low_pass_filter(100).apply_gain(3) # Boost low frequencies for more punch
	mid_freq_cut = audio.high_pass_filter(500).apply_gain(-2) # Cut mid frequencies for clarity
	audio = audio.overlay(low_freq_boost).overlay(mid_freq_cut)

	# Add fade in and fade out for smoother transitions
	audio = audio.fade_in(50).fade_out(50)

	# Export the processed audio back to the file
	audio.export(filename, format="wav", bitrate="32k")


	def save_sample(sample, filename):
	# Save the generated sample to a file
	write(filename, SAMPLE_RATE, sample)
	# Convert to 32-bit float WAV using pydub for better compatibility
	audio = AudioSegment.from_wav(filename)
	audio.export(filename, format="wav", bitrate="32k")


	def calculate_similarity(filename1, filename2):
	# Load both audio files and compute their MFCCs for similarity comparison
	y1, sr1 = librosa.load(filename1, sr=SAMPLE_RATE)
	y2, sr2 = librosa.load(filename2, sr=SAMPLE_RATE)
	mfcc1 = mfcc(y=y1, sr=sr1)
	mfcc2 = mfcc(y=y2, sr=sr2)
	return np.linalg.norm(mfcc1 - mfcc2) # Calculate Euclidean distance between MFCCs


	def generate_samples(num_samples):
	generated_count = 0
	with concurrent.futures.ThreadPoolExecutor() as executor:
	for i in range(num_samples):
	# Randomize parameters for variability in each kick sample
	frequency = random.uniform(40, 80) # Frequency in Hz
	attack = random.uniform(0.005, 0.02) # Attack time in seconds
	decay = random.uniform(0.1, 0.5) # Decay time in seconds
	sustain_level = random.uniform(0.2, 0.5) # Sustain level (0 to 1)
	release = random.uniform(0.05, 0.3) # Release time in seconds
	distortion = random.choice(DISTORTION_TYPES) # Select a random distortion type
	reverb_amount = random.uniform(0.1, 0.5) # Reverb amount
	bitcrusher_depth = random.randint(6, 12) # Bitcrusher depth
	waveform_type = random.choice(WAVEFORM_TYPES) # Select a random waveform type
	filter_type = random.choice(FILTER_TYPES) # Select a random filter type

	# Generate kick drum sample
	sample = generate_layered_kick(frequency, attack, decay, sustain_level, release, distortion, reverb_amount, bitcrusher_depth, waveform_type, filter_type)

	# Save the sample to a file
	filename = os.path.join(OUTPUT_DIR, f"kick_{i + 1}.wav")
	save_sample(sample, filename)

	# Apply additional effects to the saved sample
	apply_effects(filename)

	# Calculate hash of the sample to check for uniqueness
	with open(filename, 'rb') as f:
	file_hash = hashlib.md5(f.read()).hexdigest()

	# Only save unique samples to the final output directory
	if file_hash not in unique_hashes:
	unique_hashes[file_hash] = filename
	generated_count += 1
	final_filename = os.path.join(FINAL_OUTPUT_DIR, f"kick_filtered_{generated_count}.wav")
	os.rename(filename, final_filename)
	print(f"Generated and kept: {final_filename}")

	# Stop once we've collected enough unique samples
	if generated_count >= FINAL_NUM_SAMPLES:
	break

	# Copy final unique samples to an extended directory for additional manual processing if needed
	for file in os.listdir(FINAL_OUTPUT_DIR):
	shutil.copy(os.path.join(FINAL_OUTPUT_DIR, file), EXTENDED_OUTPUT_DIR)

	print(f"Successfully generated {generated_count} unique kick drum samples in '{FINAL_OUTPUT_DIR}' directory.")
	print(f"Extended samples are available in '{EXTENDED_OUTPUT_DIR}' for further processing.")


	if __name__ == "__main__":
	generate_samples(NUM_SAMPLES)
	print(f"Finished processing kick drum samples.")