-
-
Save Bentroen/4df9b8d5d052f9d14bc1a8531fe49994 to your computer and use it in GitHub Desktop.
from pydub import AudioSegment | |
import numpy as np | |
class Mixer: | |
def __init__(self): | |
self.parts = [] | |
def __len__(self): | |
parts = self._sync() | |
seg = parts[0][1] | |
frame_count = max(offset + seg.frame_count() for offset, seg in parts) | |
return int(1000.0 * frame_count / seg.frame_rate) | |
def overlay(self, sound, position=0): | |
self.parts.append((position, sound)) | |
return self | |
def _sync(self): | |
positions, segs = zip(*self.parts) | |
frame_rate = segs[0].frame_rate | |
array_type = segs[0].array_type | |
offsets = [int(frame_rate * pos / 1000.0) for pos in positions] | |
segs = AudioSegment.empty()._sync(*segs) | |
return list(zip(offsets, segs)) | |
def append(self, sound): | |
self.overlay(sound, position=len(self)) | |
def to_audio_segment(self): | |
parts = self._sync() | |
seg = parts[0][1] | |
channels = seg.channels | |
frame_count = max(offset + seg.frame_count() for offset, seg in parts) | |
sample_count = int(frame_count * seg.channels) | |
# We use a larger data type so that clipping doesn't cause data loss | |
output = np.zeros(sample_count, dtype="int32") | |
for offset, seg in parts: | |
sample_offset = offset * channels | |
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int16") | |
start = sample_offset | |
end = start + len(samples) | |
output[start:end] += samples | |
# The audio is then normalized to occupy the full "height" again | |
return seg._spawn(output, overrides={"sample_width": 4}).normalize(headroom=0.0) |
Hi @Peda1996! You're absolutely right. The code currently does not account for 32-bit WAV, only 16-bit! I only bothered making it support that because it was enough for what I needed, but I do eventually plan to fix this. :)
Your change is fine! Just keep in mind that the code is using an "oversized" array in order to account for clipping. So if you changed samples
to 32-bit, you probably should change output
to be 64-bit; otherwise, clipping may occur as it's adding the samples.
Thank you for using the script; glad it got to be useful 😄
Well, 64-bit output won't work that well with pydub, I think. So one possibility would be to rescale the 32 Bit Input Array to 16 bit (in this case it's also automatically normalized)?
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int32")
samples = np.int16(samples/np.max(np.abs(samples)) * 32767)
If normalizing isn't wanted, one could also just use the maximal 32Bit integer Value as a dividend:
samples = np.int16(samples/2147483647 * 32767)
16 bit int max value: 32767
32 bit int max value: 2147483647
Hey, thanks for your great work :)
I found a small bug within your code, where certain pydub Audiosegments (directly read as .wav) caused errors. I fixed this issue by replacing
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int16")
withsamples = np.frombuffer(seg.get_array_of_samples(), dtype="int32")
.