-
-
Save Bentroen/4df9b8d5d052f9d14bc1a8531fe49994 to your computer and use it in GitHub Desktop.
| from pydub import AudioSegment | |
| import numpy as np | |
| class Mixer: | |
| def __init__(self): | |
| self.parts = [] | |
| def __len__(self): | |
| parts = self._sync() | |
| seg = parts[0][1] | |
| frame_count = max(offset + seg.frame_count() for offset, seg in parts) | |
| return int(1000.0 * frame_count / seg.frame_rate) | |
| def overlay(self, sound, position=0): | |
| self.parts.append((position, sound)) | |
| return self | |
| def _sync(self): | |
| positions, segs = zip(*self.parts) | |
| frame_rate = segs[0].frame_rate | |
| array_type = segs[0].array_type | |
| offsets = [int(frame_rate * pos / 1000.0) for pos in positions] | |
| segs = AudioSegment.empty()._sync(*segs) | |
| return list(zip(offsets, segs)) | |
| def append(self, sound): | |
| self.overlay(sound, position=len(self)) | |
| def to_audio_segment(self): | |
| parts = self._sync() | |
| seg = parts[0][1] | |
| channels = seg.channels | |
| frame_count = max(offset + seg.frame_count() for offset, seg in parts) | |
| sample_count = int(frame_count * seg.channels) | |
| # We use a larger data type so that clipping doesn't cause data loss | |
| output = np.zeros(sample_count, dtype="int32") | |
| for offset, seg in parts: | |
| sample_offset = offset * channels | |
| samples = np.frombuffer(seg.get_array_of_samples(), dtype="int16") | |
| start = sample_offset | |
| end = start + len(samples) | |
| output[start:end] += samples | |
| # The audio is then normalized to occupy the full "height" again | |
| return seg._spawn(output, overrides={"sample_width": 4}).normalize(headroom=0.0) |
Hi @Peda1996! You're absolutely right. The code currently does not account for 32-bit WAV, only 16-bit! I only bothered making it support that because it was enough for what I needed, but I do eventually plan to fix this. :)
Your change is fine! Just keep in mind that the code is using an "oversized" array in order to account for clipping. So if you changed samples to 32-bit, you probably should change output to be 64-bit; otherwise, clipping may occur as it's adding the samples.
Thank you for using the script; glad it got to be useful 😄
Well, 64-bit output won't work that well with pydub, I think. So one possibility would be to rescale the 32 Bit Input Array to 16 bit (in this case it's also automatically normalized)?
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int32")
samples = np.int16(samples/np.max(np.abs(samples)) * 32767)
If normalizing isn't wanted, one could also just use the maximal 32Bit integer Value as a dividend:
samples = np.int16(samples/2147483647 * 32767)
16 bit int max value: 32767
32 bit int max value: 2147483647
Hey, thanks for your great work :)
I found a small bug within your code, where certain pydub Audiosegments (directly read as .wav) caused errors. I fixed this issue by replacing
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int16")withsamples = np.frombuffer(seg.get_array_of_samples(), dtype="int32").