Created
February 5, 2024 05:46
-
-
Save Frank-Buss/d807805c533a2687f3ff266f9fffcfd9 to your computer and use it in GitHub Desktop.
LUFS momentary and short analyzer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Audio loudness calculation and normalization. | |
# The LUFS calculations here are all based on: | |
# ITU documentation: https://www.itu.int/dms_pubrec/itu-r/rec/bs/R-REC-BS.1770-4-201510-I!!PDF-E.pdf | |
# EBU documentation: https://tech.ebu.ch/docs/tech/tech3341.pdf | |
# pyloudnorm by csteinmetz1: https://github.com/csteinmetz1/pyloudnorm | |
# loudness.py by BrechtDeMan: https://github.com/BrechtDeMan/loudness.py | |
# Special thanks to these authors! | |
# I just rewrote some codes to enable short-term and momentary loudness calculations and normalizations for more convinent batch processing of audio files. | |
import numpy as np | |
from scipy import signal | |
def amp2db(amp: float): # zero or positive amp value range between 0 and 1. | |
return 20*np.log10(amp) | |
def db2amp(db: float): # zero or negative db value. | |
return np.power(10, db/20) | |
def change_vol(au, db_change): | |
return au*db2amp(db_change) | |
def check_clipping(au): | |
assert np.amax(np.abs(au)) >= 1, 'Clipping has occurred.' | |
def recompare(au, au_re, sr=None): | |
print('reconstruction comparison:') | |
if sr: | |
print(f'difference in length: {round((au_re.shape[0] - au.shape[0])/sr, 4)} seconds') | |
print(f'max error: {round(amp2db(np.amax(np.abs(au_re[:au.shape[0],...] - au))), 4)} db') | |
def print_peak(au): | |
au_abs = np.abs(au) | |
peak_amp = np.amax(au_abs) | |
peak_db = amp2db(peak_amp) | |
print(f'peak_amp = {round(peak_amp, 5)}, peak_db = {round(peak_db, 2)}') | |
def get_peak(au): | |
return np.amax(np.abs(au)) | |
def get_peak_LR(au): | |
peak_LR = np.amax(np.abs(au), axis=0) | |
return peak_LR[0], peak_LR[1] | |
def norm_peak_mid(au, peak_db=-12.0): | |
""" | |
normalize the peak amplitude of the mid channel of a stereo wav file under the normal -3db pan law. | |
input array, output array, no read or write audio files. | |
""" | |
au *= db2amp(peak_db)/np.amax(np.abs(np.average(au, axis=-1))) | |
return au | |
def norm_peak_mono(au, peak_db=-12.0): | |
""" | |
normalize the peak amplitude of a mono audio. | |
input array, output array, no read or write audio files. | |
""" | |
au *= db2amp(peak_db)/np.amax(np.abs(au)) | |
return au | |
class lufs_meter(): | |
# The momentary and integrated LUFS, defined as python class to pre-compute the window and prefilter coefficients. | |
# Input audio need to have shape (nsample,) or (nsample, nchannel). | |
def __init__(self, sr, T=0.4, overlap=0.75, threshold=-70.0, start_du=None): | |
""" | |
Parameters | |
sr: float (Hz). Sample rate for audio. If you want to process different sample rates, you need to set multiple meters. | |
T: float (seconds). Time length of each window. You can change it to 3 to get the short-term lufs (Slufs). | |
overlap: float (fraction). Proportion of overlapping between windows. | |
threshold: float (LUFS or LKFS). If the LUFS is lower than this threshold, the meter will return -inf instead of very big negative numbers for runtime stability. | |
start_du: float (seconds). The start seconds to only analyze. | |
Only works for mono or stereo audio because I just summed all the channels and didn't calculate the different weights in case of a more-than-2-channels audio input. | |
""" | |
# Calculate window. | |
self.sr, self.T, self.overlap, self.threshold, self.du_start = sr, T, overlap, threshold, start_du | |
self.step, self.hop = int(sr*T), int(sr*T*(1-overlap)) | |
self.z_threshold = np.power(10, (threshold+0.691)/10) | |
if start_du is None: | |
self.n_start = None | |
else: | |
self.n_start = int(sr*start_du) | |
# Calculate prefilter (containing 2 IIR filters) coefficients. | |
if sr == 48000: | |
# coefficients in the ITU documentation. | |
self.sos = np.array([[1.53512485958697, -2.69169618940638, 1.19839281085285, \ | |
1.0 , -1.69065929318241, 0.73248077421585], \ | |
[1.0, -2.0, 1.0, 1.0, -1.99004745483398, 0.99007225036621]]) | |
elif sr == 44100: | |
# coefficients calculation by BrechtDeMan. | |
self.sos = np.array([[1.5308412300498355, -2.6509799951536985, 1.1690790799210682, \ | |
1.0, -1.6636551132560204, 0.7125954280732254], \ | |
[1.0, -2.0, 1.0, 1.0, -1.9891696736297957, 0.9891990357870394]]) | |
else: | |
# coefficients calculation by BrechtDeMan. | |
# pre-filter 1 | |
f0 = 1681.9744509555319 | |
G = 3.99984385397 | |
Q = 0.7071752369554193 | |
K = np.tan(np.pi * f0 / sr) | |
Vh = np.power(10.0, G / 20.0) | |
Vb = np.power(Vh, 0.499666774155) | |
a0_1_ = 1.0 + K / Q + K * K | |
b0_1 = (Vh + Vb * K / Q + K * K) / a0_1_ | |
b1_1 = 2.0 * (K * K - Vh) / a0_1_ | |
b2_1 = (Vh - Vb * K / Q + K * K) / a0_1_ | |
a0_1 = 1.0 | |
a1_1 = 2.0 * (K * K - 1.0) / a0_1_ | |
a2_1 = (1.0 - K / Q + K * K) / a0_1_ | |
# pre-filter 2 | |
f0 = 38.13547087613982 | |
Q = 0.5003270373253953 | |
K = np.tan(np.pi * f0 / sr) | |
a0_2 = 1.0 | |
a1_2 = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K) | |
a2_2 = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K) | |
b0_2 = 1.0 | |
b1_2 = -2.0 | |
b2_2 = 1.0 | |
self.sos = np.array([[b0_1, b1_1, b2_1, a0_1, a1_1, a2_1], \ | |
[b0_2, b1_2, b2_2, a0_2, a1_2, a2_2]]) | |
def get_mlufs(self, au): | |
# Get the momentary LUFS array with size n_window. | |
# The audio array will be padded zeros at the end to complete the last window. | |
au = au[:self.n_start,...] | |
q1, q2 = divmod(au.shape[0], self.hop) | |
q3 = self.step - self.hop - q2 | |
if q3 > 0: | |
pad_shape = list(au.shape) | |
pad_shape[0] = q3 | |
au = np.append(au, np.zeros(pad_shape), axis=0) | |
Mlufs = np.empty(0) | |
for i in range(q1): | |
au_f = au[i*self.hop: i*self.hop+self.step,...] | |
au_f = signal.sosfilt(self.sos, au_f, axis=0) | |
z = np.sum(np.average(np.square(au_f), axis=0)) | |
if z < self.z_threshold: | |
mlufs = float('-inf') | |
else: | |
mlufs = -0.691 + 10*np.log10(z) | |
Mlufs = np.append(Mlufs, mlufs) | |
return Mlufs | |
def get_mlufs_max(self, au): | |
# Get the maxinum momentary LUFS value. | |
return np.amax(self.get_mlufs(au)) | |
def get_ilufs(self, au): | |
# Get the integrated LUFS value. | |
# The audio array will be padded zeros at the end to complete the last window. | |
Mlufs = self.get_mlufs(au) | |
Z0 = Z[Mlufs > -70.0] | |
if Z0.size == 0: | |
ilufs = float('-inf') | |
else: | |
z1 = np.average(Z0) | |
if z1 >= self.z_threshold: | |
Z = Z[Mlufs > -0.691 + 10*np.log10(z1) - 10] | |
else: | |
pass | |
if Z.size == 0: | |
ilufs = float('-inf') | |
else: | |
z2 = np.average(Z) | |
if z2 >= self.z_threshold: | |
ilufs = -0.691 + 10*np.log10(z2) | |
else: | |
ilufs = float('-inf') | |
return ilufs | |
def norm_mlufs_max(self, au, target=-20.0): | |
# Normalize the maxinum momentary LUFS. | |
return au*db2amp(target - self.get_mlufs_max(au)) | |
def norm_ilufs(self, au, target=-23.0): | |
# Normalize the integrated LUFS. | |
return au*db2amp(target - self.get_ilufs(au)) | |
def print_mlufs_max(self, au): | |
# Print the maxinum momentary LUFS value. | |
print(f'mlufs_max = {round(self.get_mlufs_max(au), 4)} LUFS') | |
def print_ilufs(self, au): | |
# Print the integrated LUFS value. | |
print(f'ilufs = {round(self.get_ilufs(au), 4)} LUFS') | |
# usage example to load a sound file and plot it | |
import sys | |
import numpy as np | |
import soundfile as sf | |
from scipy import signal | |
import matplotlib.pyplot as plt | |
from lufsmeter import lufs_meter | |
def read_audio_file(file_path): | |
"""Reads an audio file and returns the audio data and sample rate.""" | |
data, rate = sf.read(file_path) | |
# Mono conversion if stereo | |
if len(data.shape) > 1: | |
data = np.mean(data, axis=1) | |
return data, rate | |
def plot_loudness(momentary_lufs, short_term_lufs, sr, momentary_hop_size, short_term_hop_size): | |
"""Plots the momentary and short-term loudness levels.""" | |
plt.figure(figsize=(10, 6)) | |
# Calculate the time axis for momentary and short-term loudness | |
times_m = np.arange(len(momentary_lufs)) * momentary_hop_size / sr | |
times_s = np.arange(len(short_term_lufs)) * short_term_hop_size / sr | |
# Plot the momentary and short-term loudness levels | |
plt.plot(times_m, momentary_lufs, label='Momentary LUFS', alpha=0.7) | |
plt.plot(times_s, short_term_lufs, label='Short-term LUFS', alpha=0.7) | |
# Configure the plot | |
plt.xlabel('Time (s)') | |
plt.ylabel('Loudness (LUFS)') | |
plt.title('Momentary and Short-term Loudness Over Time') | |
plt.legend() | |
plt.grid(True) | |
plt.show() | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: python script.py <audio_file_path>") | |
sys.exit(1) | |
audio_file_path = sys.argv[1] | |
data, sr = read_audio_file(audio_file_path) | |
# Create LUFS meters for momentary and short-term loudness | |
momentary_meter = lufs_meter(sr, T=0.4) # 400 ms for momentary loudness | |
short_term_meter = lufs_meter(sr, T=3) # 3 seconds for short-term loudness | |
# Calculate loudness | |
momentary_lufs = momentary_meter.get_mlufs(data) | |
short_term_lufs = short_term_meter.get_mlufs(data) | |
# Plotting | |
plot_loudness(momentary_lufs, short_term_lufs, sr, momentary_meter.hop, short_term_meter.hop) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment