Skip to content

Instantly share code, notes, and snippets.

@daitomanabe
Created June 1, 2023 12:16
Show Gist options
  • Save daitomanabe/39526e2466e8a5747a2aa900a8ceaccd to your computer and use it in GitHub Desktop.
Save daitomanabe/39526e2466e8a5747a2aa900a8ceaccd to your computer and use it in GitHub Desktop.
wav_to_csv (low, mid, high, total)
import numpy as np
import soundfile as sf
import csv
import os
import sys
import glob
# Directory is passed as a command-line argument
directory = sys.argv[1]
# Process all WAV and AIFF files in the directory
for audio_filename in glob.glob(os.path.join(directory, '*')):
# Skip files that are not WAV or AIFF
ext = os.path.splitext(audio_filename)[1].lower()
if ext not in ['.wav', '.aif']:
continue
# Read the audio file
data, samplerate = sf.read(audio_filename)
# If the file is stereo, convert it to mono
if len(data.shape) > 1 and data.shape[1] > 1:
data = np.mean(data, axis=1)
# Number of samples per frame
samples_per_frame = samplerate // 30
# Output CSV file has the same name as the audio file but with .csv extension
csv_filename = os.path.splitext(audio_filename)[0] + '.csv'
with open(csv_filename, 'w', newline='') as csvfile:
fieldnames = ['low', 'mid', 'high', 'total']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i in range(0, len(data), samples_per_frame):
frame = data[i:i+samples_per_frame]
# Fourier transform
magnitudes = np.abs(np.fft.rfft(frame))
freqs = np.fft.rfftfreq(len(frame), 1.0/samplerate)
# Define the range for low, mid, high frequencies (Hz)
low_range = (20, 300)
mid_range = (300, 3000)
high_range = (3000, 20000)
# Calculate the volume for each frequency range
low_volume = magnitudes[(freqs >= low_range[0]) & (freqs <= low_range[1])].sum()
mid_volume = magnitudes[(freqs >= mid_range[0]) & (freqs <= mid_range[1])].sum()
high_volume = magnitudes[(freqs >= high_range[0]) & (freqs <= high_range[1])].sum()
# Normalize by the total amplitude
max_magnitude = max(low_volume, mid_volume, high_volume)
total_volume = 0
if max_magnitude > 0:
low_volume /= max_magnitude
mid_volume /= max_magnitude
high_volume /= max_magnitude
# Calculate the total volume
total_volume = low_volume + mid_volume + high_volume
total_volume /= 3 # Normalize total_volume by dividing by the maximum possible value (3)
# Write to the CSV
writer.writerow({'low': low_volume, 'mid': mid_volume, 'high': high_volume, 'total': total_volume})
# Print a message to show progress
print(f'Finished processing {audio_filename}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment