daitomanabe · June 1, 2023 12:16
diff --git a/wav_to_csv.py b/wav_to_csv.py
 import numpy as np
 import soundfile as sf
 import csv
 import os
 import sys
 import glob

 # Directory is passed as a command-line argument
 directory = sys.argv[1]

 # Process all WAV and AIFF files in the directory
 for audio_filename in glob.glob(os.path.join(directory, '*')):
    # Skip files that are not WAV or AIFF
    ext = os.path.splitext(audio_filename)[1].lower()
    if ext not in ['.wav', '.aif']:
        continue

    # Read the audio file
    data, samplerate = sf.read(audio_filename)

    # If the file is stereo, convert it to mono
    if len(data.shape) > 1 and data.shape[1] > 1:
        data = np.mean(data, axis=1)

    # Number of samples per frame
    samples_per_frame = samplerate // 30

    # Output CSV file has the same name as the audio file but with .csv extension
    csv_filename = os.path.splitext(audio_filename)[0] + '.csv'

    with open(csv_filename, 'w', newline='') as csvfile:
        fieldnames = ['low', 'mid', 'high', 'total']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for i in range(0, len(data), samples_per_frame):
            frame = data[i:i+samples_per_frame]

            # Fourier transform
            magnitudes = np.abs(np.fft.rfft(frame))
            freqs = np.fft.rfftfreq(len(frame), 1.0/samplerate)

            # Define the range for low, mid, high frequencies (Hz)
            low_range = (20, 300)
            mid_range = (300, 3000)
            high_range = (3000, 20000)

            # Calculate the volume for each frequency range
            low_volume = magnitudes[(freqs >= low_range[0]) & (freqs <= low_range[1])].sum()
            mid_volume = magnitudes[(freqs >= mid_range[0]) & (freqs <= mid_range[1])].sum()
            high_volume = magnitudes[(freqs >= high_range[0]) & (freqs <= high_range[1])].sum()


            # Normalize by the total amplitude
            max_magnitude = max(low_volume, mid_volume, high_volume)
            total_volume = 0
            if max_magnitude > 0:
                low_volume /= max_magnitude
                mid_volume /= max_magnitude
                high_volume /= max_magnitude
                # Calculate the total volume
                total_volume = low_volume + mid_volume + high_volume
                total_volume /= 3    # Normalize total_volume by dividing by the maximum possible value (3)

            # Write to the CSV
            writer.writerow({'low': low_volume, 'mid': mid_volume, 'high': high_volume, 'total': total_volume})

    # Print a message to show progress
    print(f'Finished processing {audio_filename}')
	import numpy as np
	import soundfile as sf
	import csv
	import os
	import sys
	import glob

	# Directory is passed as a command-line argument
	directory = sys.argv[1]

	# Process all WAV and AIFF files in the directory
	for audio_filename in glob.glob(os.path.join(directory, '*')):
	# Skip files that are not WAV or AIFF
	ext = os.path.splitext(audio_filename)[1].lower()
	if ext not in ['.wav', '.aif']:
	continue

	# Read the audio file
	data, samplerate = sf.read(audio_filename)

	# If the file is stereo, convert it to mono
	if len(data.shape) > 1 and data.shape[1] > 1:
	data = np.mean(data, axis=1)

	# Number of samples per frame
	samples_per_frame = samplerate // 30

	# Output CSV file has the same name as the audio file but with .csv extension
	csv_filename = os.path.splitext(audio_filename)[0] + '.csv'

	with open(csv_filename, 'w', newline='') as csvfile:
	fieldnames = ['low', 'mid', 'high', 'total']
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

	writer.writeheader()
	for i in range(0, len(data), samples_per_frame):
	frame = data[i:i+samples_per_frame]

	# Fourier transform
	magnitudes = np.abs(np.fft.rfft(frame))
	freqs = np.fft.rfftfreq(len(frame), 1.0/samplerate)

	# Define the range for low, mid, high frequencies (Hz)
	low_range = (20, 300)
	mid_range = (300, 3000)
	high_range = (3000, 20000)

	# Calculate the volume for each frequency range
	low_volume = magnitudes[(freqs >= low_range[0]) & (freqs <= low_range[1])].sum()
	mid_volume = magnitudes[(freqs >= mid_range[0]) & (freqs <= mid_range[1])].sum()
	high_volume = magnitudes[(freqs >= high_range[0]) & (freqs <= high_range[1])].sum()


	# Normalize by the total amplitude
	max_magnitude = max(low_volume, mid_volume, high_volume)
	total_volume = 0
	if max_magnitude > 0:
	low_volume /= max_magnitude
	mid_volume /= max_magnitude
	high_volume /= max_magnitude
	# Calculate the total volume
	total_volume = low_volume + mid_volume + high_volume
	total_volume /= 3 # Normalize total_volume by dividing by the maximum possible value (3)

	# Write to the CSV
	writer.writerow({'low': low_volume, 'mid': mid_volume, 'high': high_volume, 'total': total_volume})

	# Print a message to show progress
	print(f'Finished processing {audio_filename}')