Created
June 1, 2023 12:16
-
-
Save daitomanabe/39526e2466e8a5747a2aa900a8ceaccd to your computer and use it in GitHub Desktop.
wav_to_csv (low, mid, high, total)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import soundfile as sf | |
import csv | |
import os | |
import sys | |
import glob | |
# Directory is passed as a command-line argument | |
directory = sys.argv[1] | |
# Process all WAV and AIFF files in the directory | |
for audio_filename in glob.glob(os.path.join(directory, '*')): | |
# Skip files that are not WAV or AIFF | |
ext = os.path.splitext(audio_filename)[1].lower() | |
if ext not in ['.wav', '.aif']: | |
continue | |
# Read the audio file | |
data, samplerate = sf.read(audio_filename) | |
# If the file is stereo, convert it to mono | |
if len(data.shape) > 1 and data.shape[1] > 1: | |
data = np.mean(data, axis=1) | |
# Number of samples per frame | |
samples_per_frame = samplerate // 30 | |
# Output CSV file has the same name as the audio file but with .csv extension | |
csv_filename = os.path.splitext(audio_filename)[0] + '.csv' | |
with open(csv_filename, 'w', newline='') as csvfile: | |
fieldnames = ['low', 'mid', 'high', 'total'] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
for i in range(0, len(data), samples_per_frame): | |
frame = data[i:i+samples_per_frame] | |
# Fourier transform | |
magnitudes = np.abs(np.fft.rfft(frame)) | |
freqs = np.fft.rfftfreq(len(frame), 1.0/samplerate) | |
# Define the range for low, mid, high frequencies (Hz) | |
low_range = (20, 300) | |
mid_range = (300, 3000) | |
high_range = (3000, 20000) | |
# Calculate the volume for each frequency range | |
low_volume = magnitudes[(freqs >= low_range[0]) & (freqs <= low_range[1])].sum() | |
mid_volume = magnitudes[(freqs >= mid_range[0]) & (freqs <= mid_range[1])].sum() | |
high_volume = magnitudes[(freqs >= high_range[0]) & (freqs <= high_range[1])].sum() | |
# Normalize by the total amplitude | |
max_magnitude = max(low_volume, mid_volume, high_volume) | |
total_volume = 0 | |
if max_magnitude > 0: | |
low_volume /= max_magnitude | |
mid_volume /= max_magnitude | |
high_volume /= max_magnitude | |
# Calculate the total volume | |
total_volume = low_volume + mid_volume + high_volume | |
total_volume /= 3 # Normalize total_volume by dividing by the maximum possible value (3) | |
# Write to the CSV | |
writer.writerow({'low': low_volume, 'mid': mid_volume, 'high': high_volume, 'total': total_volume}) | |
# Print a message to show progress | |
print(f'Finished processing {audio_filename}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment