Created
June 1, 2023 12:16
Revisions
-
daitomanabe created this gist
Jun 1, 2023 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,69 @@ import numpy as np import soundfile as sf import csv import os import sys import glob # Directory is passed as a command-line argument directory = sys.argv[1] # Process all WAV and AIFF files in the directory for audio_filename in glob.glob(os.path.join(directory, '*')): # Skip files that are not WAV or AIFF ext = os.path.splitext(audio_filename)[1].lower() if ext not in ['.wav', '.aif']: continue # Read the audio file data, samplerate = sf.read(audio_filename) # If the file is stereo, convert it to mono if len(data.shape) > 1 and data.shape[1] > 1: data = np.mean(data, axis=1) # Number of samples per frame samples_per_frame = samplerate // 30 # Output CSV file has the same name as the audio file but with .csv extension csv_filename = os.path.splitext(audio_filename)[0] + '.csv' with open(csv_filename, 'w', newline='') as csvfile: fieldnames = ['low', 'mid', 'high', 'total'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in range(0, len(data), samples_per_frame): frame = data[i:i+samples_per_frame] # Fourier transform magnitudes = np.abs(np.fft.rfft(frame)) freqs = np.fft.rfftfreq(len(frame), 1.0/samplerate) # Define the range for low, mid, high frequencies (Hz) low_range = (20, 300) mid_range = (300, 3000) high_range = (3000, 20000) # Calculate the volume for each frequency range low_volume = magnitudes[(freqs >= low_range[0]) & (freqs <= low_range[1])].sum() mid_volume = magnitudes[(freqs >= mid_range[0]) & (freqs <= mid_range[1])].sum() high_volume = magnitudes[(freqs >= high_range[0]) & (freqs <= high_range[1])].sum() # Normalize by the total amplitude max_magnitude = max(low_volume, mid_volume, high_volume) total_volume = 0 if max_magnitude > 0: low_volume /= max_magnitude mid_volume /= max_magnitude high_volume /= max_magnitude # Calculate the total volume total_volume = low_volume + mid_volume + high_volume total_volume /= 3 # Normalize total_volume by dividing by the maximum possible value (3) # Write to the CSV writer.writerow({'low': low_volume, 'mid': mid_volume, 'high': high_volume, 'total': total_volume}) # Print a message to show progress print(f'Finished processing {audio_filename}')