Skip to content

Instantly share code, notes, and snippets.

@daitomanabe
Created June 1, 2023 12:16

Revisions

  1. daitomanabe created this gist Jun 1, 2023.
    69 changes: 69 additions & 0 deletions wav_to_csv.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,69 @@
    import numpy as np
    import soundfile as sf
    import csv
    import os
    import sys
    import glob

    # Directory is passed as a command-line argument
    directory = sys.argv[1]

    # Process all WAV and AIFF files in the directory
    for audio_filename in glob.glob(os.path.join(directory, '*')):
    # Skip files that are not WAV or AIFF
    ext = os.path.splitext(audio_filename)[1].lower()
    if ext not in ['.wav', '.aif']:
    continue

    # Read the audio file
    data, samplerate = sf.read(audio_filename)

    # If the file is stereo, convert it to mono
    if len(data.shape) > 1 and data.shape[1] > 1:
    data = np.mean(data, axis=1)

    # Number of samples per frame
    samples_per_frame = samplerate // 30

    # Output CSV file has the same name as the audio file but with .csv extension
    csv_filename = os.path.splitext(audio_filename)[0] + '.csv'

    with open(csv_filename, 'w', newline='') as csvfile:
    fieldnames = ['low', 'mid', 'high', 'total']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for i in range(0, len(data), samples_per_frame):
    frame = data[i:i+samples_per_frame]

    # Fourier transform
    magnitudes = np.abs(np.fft.rfft(frame))
    freqs = np.fft.rfftfreq(len(frame), 1.0/samplerate)

    # Define the range for low, mid, high frequencies (Hz)
    low_range = (20, 300)
    mid_range = (300, 3000)
    high_range = (3000, 20000)

    # Calculate the volume for each frequency range
    low_volume = magnitudes[(freqs >= low_range[0]) & (freqs <= low_range[1])].sum()
    mid_volume = magnitudes[(freqs >= mid_range[0]) & (freqs <= mid_range[1])].sum()
    high_volume = magnitudes[(freqs >= high_range[0]) & (freqs <= high_range[1])].sum()


    # Normalize by the total amplitude
    max_magnitude = max(low_volume, mid_volume, high_volume)
    total_volume = 0
    if max_magnitude > 0:
    low_volume /= max_magnitude
    mid_volume /= max_magnitude
    high_volume /= max_magnitude
    # Calculate the total volume
    total_volume = low_volume + mid_volume + high_volume
    total_volume /= 3 # Normalize total_volume by dividing by the maximum possible value (3)

    # Write to the CSV
    writer.writerow({'low': low_volume, 'mid': mid_volume, 'high': high_volume, 'total': total_volume})

    # Print a message to show progress
    print(f'Finished processing {audio_filename}')